diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 45b5b2979a562..c6c2e50ce76a3 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -1210,6 +1210,8 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { return SelectPatternResult::isMinOrMax( matchSelectPattern(Cur, LHS, RHS).Flavor); } + if (isAnyOfRecurrenceKind(getRecurrenceKind())) + return isa(Cur); // Recognize a call to the llvm.fmuladd intrinsic. if (isFMulAddIntrinsic(Cur)) return true; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b64bac329e05d..fbc1e4cbcfb58 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5834,6 +5834,14 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I, Intrinsic::ID MinMaxID = getMinMaxReductionIntrinsicOp(RK); BaseCost = TTI.getMinMaxReductionCost(MinMaxID, VectorTy, RdxDesc.getFastMathFlags(), CostKind); + } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { + VectorType *BoolTy = VectorType::get( + Type::getInt1Ty(VectorTy->getContext()), VectorTy->getElementCount()); + BaseCost = + TTI.getArithmeticReductionCost(Instruction::Or, BoolTy, + RdxDesc.getFastMathFlags(), CostKind) + + TTI.getArithmeticInstrCost(Instruction::Or, BoolTy->getScalarType(), + CostKind); } else { BaseCost = TTI.getArithmeticReductionCost( RdxDesc.getOpcode(), VectorTy, RdxDesc.getFastMathFlags(), CostKind); @@ -9666,10 +9674,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); RecurKind Kind = RdxDesc.getRecurrenceKind(); - assert( - !RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && - !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) && - "AnyOf and FindLast reductions are not allowed for in-loop reductions"); + assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) && + "FindLast reductions are not allowed for in-loop reductions"); // Collect the chain of "link" recipes for the reduction starting at PhiR. SetVector Worklist; @@ -9738,6 +9744,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( CurrentLinkI->getFastMathFlags()); LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator()); VecOp = FMulRecipe; + } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind)) { + assert(isa(CurrentLink) && + "must be a select recipe"); + VecOp = CurrentLink->getOperand(0); + Kind = RecurKind::Or; } else { if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { if (isa(CurrentLink)) { @@ -9902,10 +9913,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // selected if the negated condition is true in any iteration. if (Select->getOperand(1) == PhiR) Cmp = Builder.createNot(Cmp); - VPValue *Or = Builder.createOr(PhiR, Cmp); - Select->getVPSingleValue()->replaceAllUsesWith(Or); - // Delete Select now that it has invalid types. - ToDelete.push_back(Select); + + if (PhiR->isInLoop() && MinVF.isVector()) { + auto *Reduction = cast( + *find_if(PhiR->users(), IsaPred)); + Reduction->setOperand(1, Cmp); + } else { + VPValue *Or = Builder.createOr(PhiR, Cmp); + Select->getVPSingleValue()->replaceAllUsesWith(Or); + // Delete Select now that it has invalid types. + ToDelete.push_back(Select); + } // Convert the reduction phi to operate on bools. PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index cdef7972f3bdc..f9ef2b4f1b593 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -668,10 +668,10 @@ Value *VPInstruction::generate(VPTransformState &State) { // Create the reduction after the loop. Note that inloop reductions create // the target reduction in the loop using a Reduction recipe. - if ((State.VF.isVector() || - RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || - RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) && - !PhiR->isInLoop()) { + if (((State.VF.isVector() || + RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) && + !PhiR->isInLoop()) || + RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { // TODO: Support in-order reductions based on the recurrence descriptor. // All ops in the reduction inherit fast-math-flags from the recurrence // descriptor. @@ -2302,7 +2302,7 @@ void VPReductionRecipe::execute(VPTransformState &State) { Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true); RecurKind Kind = getRecurrenceKind(); assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) && - "In-loop AnyOf reductions aren't currently supported"); + "In-loop AnyOf reduction should use Or reduction recipe"); // Propagate the fast-math flags carried by the underlying instruction. IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); State.Builder.setFastMathFlags(getFastMathFlags()); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll index d3baf0e4dce09..ae9f6de94bcb4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll @@ -1924,7 +1924,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 @@ -1932,15 +1932,14 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 3) -; IF-EVL-NEXT: [[TMP15:%.*]] = or [[VEC_PHI]], [[TMP14]] -; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i1( splat (i1 true), [[TMP15]], [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP15:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP14]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP19]] = or i1 [[TMP15]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1978,18 +1977,18 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_LOAD]], splat (i32 3) -; NO-VP-NEXT: [[TMP10]] = or [[VEC_PHI]], [[TMP9]] +; NO-VP-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; NO-VP-NEXT: [[TMP12]] = or i1 [[TMP10]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; NO-VP: middle.block: -; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) ; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] ; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]] ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -2051,7 +2050,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0 @@ -2059,15 +2058,14 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = fcmp fast olt [[VP_OP_LOAD]], splat (float 3.000000e+00) -; IF-EVL-NEXT: [[TMP15:%.*]] = or [[VEC_PHI]], [[TMP14]] -; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i1( splat (i1 true), [[TMP15]], [[VEC_PHI]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP15:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, [[TMP14]], splat (i1 true), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP19]] = or i1 [[TMP15]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2105,18 +2103,18 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] ; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 ; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt [[WIDE_LOAD]], splat (float 3.000000e+00) -; NO-VP-NEXT: [[TMP10]] = or [[VEC_PHI]], [[TMP9]] +; NO-VP-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; NO-VP-NEXT: [[TMP12]] = or i1 [[TMP10]], [[VEC_PHI]] ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; NO-VP: middle.block: -; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP10]]) ; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] ; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]] ; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-blend.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-blend.ll new file mode 100644 index 0000000000000..6ff54fe6c5753 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-blend.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-OUTLOOP +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefix=CHECK-INLOOP + +; Check that a VPBlendRecipe in the chain is handled correctly, i.e. there's +; another phi in between the recurrence phi and the select. + +define i32 @select_icmp_switch(i32 %n, i32 %case, ptr %a, ptr %b, i32 %anyof) { +; CHECK-OUTLOOP-LABEL: define i32 @select_icmp_switch( +; CHECK-OUTLOOP-SAME: i32 [[N:%.*]], i32 [[CASE:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], i32 [[ANYOF:%.*]]) { +; CHECK-OUTLOOP-NEXT: [[ENTRY:.*]]: +; CHECK-OUTLOOP-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-OUTLOOP-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK-OUTLOOP: [[FOR_BODY_PREHEADER]]: +; CHECK-OUTLOOP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-OUTLOOP: [[VECTOR_PH]]: +; CHECK-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CASE]], i64 0 +; CHECK-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-OUTLOOP-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) +; CHECK-OUTLOOP-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-OUTLOOP: [[VECTOR_BODY]]: +; CHECK-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] +; CHECK-OUTLOOP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-OUTLOOP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]] +; CHECK-OUTLOOP-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 +; CHECK-OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-OUTLOOP-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 -1) +; CHECK-OUTLOOP-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-OUTLOOP-NEXT: [[TMP6:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP5]] +; CHECK-OUTLOOP-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP0]], <4 x i1> [[VEC_PHI]], <4 x i1> [[TMP6]] +; CHECK-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-OUTLOOP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-OUTLOOP-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-OUTLOOP: [[MIDDLE_BLOCK]]: +; CHECK-OUTLOOP-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]]) +; CHECK-OUTLOOP-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]] +; CHECK-OUTLOOP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i32 [[ANYOF]], i32 0 +; CHECK-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-OUTLOOP-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-OUTLOOP: [[SCALAR_PH]]: +; CHECK-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-OUTLOOP-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-OUTLOOP: [[FOR_BODY]]: +; CHECK-OUTLOOP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-OUTLOOP-NEXT: [[RDX_PHI:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_PHI_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-OUTLOOP-NEXT: [[A_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS]] +; CHECK-OUTLOOP-NEXT: [[A_VALUE:%.*]] = load i8, ptr [[A_ARRAYIDX]], align 1 +; CHECK-OUTLOOP-NEXT: [[CMP_A:%.*]] = icmp eq i8 [[A_VALUE]], -1 +; CHECK-OUTLOOP-NEXT: switch i32 [[CASE]], label %[[SW_BB0:.*]] [ +; CHECK-OUTLOOP-NEXT: i32 0, label %[[SW_BB0]] +; CHECK-OUTLOOP-NEXT: i32 1, label %[[SW_BB1:.*]] +; CHECK-OUTLOOP-NEXT: ] +; CHECK-OUTLOOP: [[SW_BB0]]: +; CHECK-OUTLOOP-NEXT: [[SELECT_BB0:%.*]] = select i1 [[CMP_A]], i32 [[RDX_PHI]], i32 [[ANYOF]] +; CHECK-OUTLOOP-NEXT: br label %[[FOR_INC]] +; CHECK-OUTLOOP: [[SW_BB1]]: +; CHECK-OUTLOOP-NEXT: br label %[[FOR_INC]] +; CHECK-OUTLOOP: [[FOR_INC]]: +; CHECK-OUTLOOP-NEXT: [[RDX_PHI_NEXT]] = phi i32 [ [[SELECT_BB0]], %[[SW_BB0]] ], [ [[RDX_PHI]], %[[SW_BB1]] ] +; CHECK-OUTLOOP-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 +; CHECK-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-OUTLOOP: [[FOR_END_LOOPEXIT]]: +; CHECK-OUTLOOP-NEXT: [[RDX_PHI_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_PHI_NEXT]], %[[FOR_INC]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-OUTLOOP-NEXT: br label %[[FOR_END]] +; CHECK-OUTLOOP: [[FOR_END]]: +; CHECK-OUTLOOP-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_PHI_NEXT_LCSSA]], %[[FOR_END_LOOPEXIT]] ] +; CHECK-OUTLOOP-NEXT: ret i32 [[SELECT_LCSSA]] +; +; CHECK-INLOOP-LABEL: define i32 @select_icmp_switch( +; CHECK-INLOOP-SAME: i32 [[N:%.*]], i32 [[CASE:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], i32 [[ANYOF:%.*]]) { +; CHECK-INLOOP-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-INLOOP-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK-INLOOP: [[FOR_BODY_PREHEADER]]: +; CHECK-INLOOP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP: [[VECTOR_PH]]: +; CHECK-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CASE]], i64 0 +; CHECK-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-INLOOP-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1) +; CHECK-INLOOP-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-INLOOP-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP: [[VECTOR_BODY]]: +; CHECK-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]] +; CHECK-INLOOP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 +; CHECK-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-INLOOP-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 -1) +; CHECK-INLOOP-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true) +; CHECK-INLOOP-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer +; CHECK-INLOOP-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-INLOOP-NEXT: [[TMP8]] = or i1 [[TMP7]], [[VEC_PHI]] +; CHECK-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-INLOOP: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP8]] +; CHECK-INLOOP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP10]], i32 [[ANYOF]], i32 0 +; CHECK-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-INLOOP-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP: [[SCALAR_PH]]: +; CHECK-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-INLOOP-NEXT: br label %[[FOR_BODY:.*]] +; CHECK-INLOOP: [[FOR_BODY]]: +; CHECK-INLOOP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-INLOOP-NEXT: [[RDX_PHI:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_PHI_NEXT:%.*]], %[[FOR_INC]] ] +; CHECK-INLOOP-NEXT: [[A_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS]] +; CHECK-INLOOP-NEXT: [[A_VALUE:%.*]] = load i8, ptr [[A_ARRAYIDX]], align 1 +; CHECK-INLOOP-NEXT: [[CMP_A:%.*]] = icmp eq i8 [[A_VALUE]], -1 +; CHECK-INLOOP-NEXT: switch i32 [[CASE]], label %[[SW_BB0:.*]] [ +; CHECK-INLOOP-NEXT: i32 0, label %[[SW_BB0]] +; CHECK-INLOOP-NEXT: i32 1, label %[[SW_BB1:.*]] +; CHECK-INLOOP-NEXT: ] +; CHECK-INLOOP: [[SW_BB0]]: +; CHECK-INLOOP-NEXT: [[SELECT_BB0:%.*]] = select i1 [[CMP_A]], i32 [[RDX_PHI]], i32 [[ANYOF]] +; CHECK-INLOOP-NEXT: br label %[[FOR_INC]] +; CHECK-INLOOP: [[SW_BB1]]: +; CHECK-INLOOP-NEXT: br label %[[FOR_INC]] +; CHECK-INLOOP: [[FOR_INC]]: +; CHECK-INLOOP-NEXT: [[RDX_PHI_NEXT]] = phi i32 [ [[SELECT_BB0]], %[[SW_BB0]] ], [ [[RDX_PHI]], %[[SW_BB1]] ] +; CHECK-INLOOP-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 +; CHECK-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-INLOOP: [[FOR_END_LOOPEXIT]]: +; CHECK-INLOOP-NEXT: [[RDX_PHI_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_PHI_NEXT]], %[[FOR_INC]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-NEXT: br label %[[FOR_END]] +; CHECK-INLOOP: [[FOR_END]]: +; CHECK-INLOOP-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_PHI_NEXT_LCSSA]], %[[FOR_END_LOOPEXIT]] ] +; CHECK-INLOOP-NEXT: ret i32 [[SELECT_LCSSA]] +; +entry: + %cmp.sgt = icmp sgt i32 %n, 0 + br i1 %cmp.sgt, label %for.body.preheader, label %for.end + +for.body.preheader: + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.body: + %indvars = phi i64 [ 0, %for.body.preheader ], [ %indvars.next, %for.inc ] + %rdx.phi = phi i32 [ 0, %for.body.preheader ], [ %rdx.phi.next, %for.inc ] + %a.arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars + %a.value = load i8, ptr %a.arrayidx, align 1 + %cmp.a = icmp eq i8 %a.value, -1 + switch i32 %case, label %sw.bb0 [ + i32 0, label %sw.bb0 + i32 1, label %sw.bb1 + ] + +sw.bb0: + %select.bb0 = select i1 %cmp.a, i32 %rdx.phi, i32 %anyof + br label %for.inc + +sw.bb1: + br label %for.inc + +for.inc: + %rdx.phi.next = phi i32 [ %select.bb0, %sw.bb0 ], [ %rdx.phi, %sw.bb1 ] + %indvars.next = add nuw nsw i64 %indvars, 1 + %exitcond.not = icmp eq i64 %indvars.next, %wide.trip.count + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + %select.lcssa = phi i32 [ %rdx.phi.next, %for.inc ], [ 0, %entry ] + ret i32 %select.lcssa +} +;. +; CHECK-OUTLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-OUTLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-OUTLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. +; CHECK-INLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-INLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-INLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 576a971c5eaa8..c322cc325f606 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -2,6 +2,7 @@ ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC1 --check-prefix=CHECK ; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC2 --check-prefix=CHECK ; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1-IC2 --check-prefix=CHECK +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefix=CHECK-INLOOP-VF4-IC1 --check-prefix=CHECK ; int multi_user_cmp(float* a, long long n) { @@ -206,6 +207,63 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP17]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP18]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7]] = or i1 [[TMP6]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: middle.block: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP7]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT2:%.*]] = select i1 [[TMP10]], i1 true, i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INLOOP-VF4-IC1: scalar.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00 +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP11]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP12]] +; entry: br label %for.body @@ -430,6 +488,63 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP17]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP18]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_int( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7]] = or i1 [[TMP6]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: middle.block: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP7]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT2:%.*]] = select i1 [[TMP10]], i1 true, i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INLOOP-VF4-IC1: scalar.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP11]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP12]] +; entry: br label %for.body @@ -839,6 +954,118 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[TMP27:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP26]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP27]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_branch_use( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.memcheck: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2 +; CHECK-INLOOP-VF4-IC1-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE8]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP33:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP33]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6]] = or i1 [[TMP5]], [[VEC_PHI]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8]] = or i1 [[TMP7]], [[VEC_PHI2]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-INLOOP-VF4-IC1: pred.store.if: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK-INLOOP-VF4-IC1: pred.store.continue: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK-INLOOP-VF4-IC1: pred.store.if3: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK-INLOOP-VF4-IC1: pred.store.continue4: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK-INLOOP-VF4-IC1: pred.store.if5: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK-INLOOP-VF4-IC1: pred.store.continue6: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] +; CHECK-INLOOP-VF4-IC1: pred.store.if7: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP24]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP27:%.*]] = add nsw i32 [[TMP26]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP27]], ptr [[TMP25]], align 4, !alias.scope [[META9]], !noalias [[META6]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE8]] +; CHECK-INLOOP-VF4-IC1: pred.store.continue8: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: middle.block: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP29:%.*]] = freeze i1 [[TMP6]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP29]], i1 false, i1 true +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP30:%.*]] = freeze i1 [[TMP8]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP30]], i1 true, i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INLOOP-VF4-IC1: scalar.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ], [ true, [[VECTOR_MEMCHECK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i1 [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ], [ false, [[VECTOR_MEMCHECK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX10]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00 +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]] +; CHECK-INLOOP-VF4-IC1: if.then3: +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[INC:%.*]] = add nsw i32 [[LOAD2]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX5]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[IF_END6]] +; CHECK-INLOOP-VF4-IC1: if.end6: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP31:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP32:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP31]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP32]] +; entry: br label %for.body @@ -1083,6 +1310,66 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF1-IC2-NEXT: [[TMP19:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP17]], i32 [[TMP18]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP19]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: vector.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7]] = or i1 [[TMP6]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: middle.block: +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP7]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT2:%.*]] = select i1 [[TMP10]], i1 true, i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INLOOP-VF4-IC1: scalar.ph: +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00 +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = zext i1 [[CMP1_LCSSA]] to i32 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP12]], i32 [[TMP13]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP14]] +; entry: br label %for.body @@ -1198,6 +1485,31 @@ define i32 @multi_user_cmp_fmax(ptr readonly %a, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP1]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_fmax( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP1]] +; entry: br label %for.body @@ -1314,6 +1626,31 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP1]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_max( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]]) +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP1]] +; entry: br label %for.body @@ -1445,6 +1782,35 @@ define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i ; CHECK-VF1-IC2-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP1]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_use_store_offset( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00 +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CONV4:%.*]] = zext i1 [[CMP1]] to i32 +; CHECK-INLOOP-VF4-IC1-NEXT: [[N32:%.*]] = trunc i64 [[N]] to i32 +; CHECK-INLOOP-VF4-IC1-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64 +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]] +; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP1]] +; entry: br label %for.body @@ -1550,6 +1916,31 @@ define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP3]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_no_vectorise( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00 +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = sext i1 [[CMP1]] to i64 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP3]] +; entry: br label %for.body @@ -1648,6 +2039,30 @@ define i32 @multi_user_cmp_extra_select(ptr readonly %a, i64 noundef %n) { ; CHECK-VF1-IC2-NEXT: [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]] ; CHECK-VF1-IC2-NEXT: ret i32 [[TMP2]] ; +; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_extra_select( +; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) { +; CHECK-INLOOP-VF4-IC1-NEXT: entry: +; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-INLOOP-VF4-IC1: for.body: +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00 +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false +; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK-INLOOP-VF4-IC1: exit: +; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ] +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3 +; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]] +; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP2]] +; entry: br label %for.body @@ -1719,5 +2134,21 @@ exit: ; CHECK-VF1-IC2: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} ; CHECK-VF1-IC2: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]]} ;. +; CHECK-INLOOP-VF4-IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-INLOOP-VF4-IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-INLOOP-VF4-IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-INLOOP-VF4-IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4-IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4-IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4-IC1: [[META6]] = !{[[META7:![0-9]+]]} +; CHECK-INLOOP-VF4-IC1: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} +; CHECK-INLOOP-VF4-IC1: [[META8]] = distinct !{[[META8]], !"LVerDomain"} +; CHECK-INLOOP-VF4-IC1: [[META9]] = !{[[META10:![0-9]+]]} +; CHECK-INLOOP-VF4-IC1: [[META10]] = distinct !{[[META10]], [[META8]]} +; CHECK-INLOOP-VF4-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]} +; CHECK-INLOOP-VF4-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]} +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK: {{.*}} diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index 550e52d318230..124f18150d4a0 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -2,6 +2,9 @@ ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC1 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF1IC4 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLOOP-VF4IC1 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLOOP-VF4IC4 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLOOP-VF1IC4 define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) { ; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp( @@ -185,6 +188,190 @@ define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] ; +; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC1: [[LOOP]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7 +; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[EXIT]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]] +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 7, i32 3 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC4: [[LOOP]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7 +; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[EXIT]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 7, i32 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF1IC4: [[LOOP]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7 +; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[EXIT]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: br label %loop @@ -376,6 +563,181 @@ define i32 @select_const_i32_from_icmp2(ptr %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] ; +; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp2( +; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC1: [[LOOP]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]] +; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[EXIT]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp2( +; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]] +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 7, i32 3 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC4: [[LOOP]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]] +; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[EXIT]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp2( +; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16]] = or i1 [[VEC_PHI]], [[TMP12]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17]] = or i1 [[VEC_PHI1]], [[TMP13]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18]] = or i1 [[VEC_PHI2]], [[TMP14]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19]] = or i1 [[VEC_PHI3]], [[TMP15]] +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP17]], [[TMP16]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[BIN_RDX]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP19]], [[BIN_RDX4]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP21]], i32 7, i32 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF1IC4: [[LOOP]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]] +; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[EXIT]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: br label %loop @@ -576,6 +938,190 @@ define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) { ; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] ; +; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_i32_from_icmp( +; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[B]], i32 [[A]] +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC1: [[LOOP]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]] +; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[EXIT]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_i32_from_icmp( +; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]] +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[B]], i32 [[A]] +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC4: [[LOOP]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]] +; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[EXIT]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_i32_from_icmp( +; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 [[B]], i32 [[A]] +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF1IC4: [[LOOP]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]] +; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[EXIT]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: br label %loop @@ -776,6 +1322,190 @@ define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] ; +; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp_fast( +; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 2 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC1: [[LOOP]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[EXIT]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast( +; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]] +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 1, i32 2 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC4: [[LOOP]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[EXIT]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast( +; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast ueq float [[TMP8]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast ueq float [[TMP9]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast ueq float [[TMP11]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF1IC4: [[LOOP]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[EXIT]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: br label %loop @@ -976,6 +1706,190 @@ define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] ; +; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 2 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC1: [[LOOP]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[EXIT]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]] +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 1, i32 2 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC4: [[LOOP]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[EXIT]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp ueq float [[TMP8]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp ueq float [[TMP9]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp ueq float [[TMP10]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP11]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]] +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF1IC4: [[LOOP]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]] +; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00 +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[EXIT]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: br label %loop @@ -1136,6 +2050,147 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] ; +; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_i32_from_icmp_same_inputs( +; CHECK-INLOOP-VF4IC1-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-INLOOP-VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]] +; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i32 [[B]], i32 [[A]] +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC1: [[LOOP]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3 +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]] +; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-INLOOP-VF4IC1: [[EXIT]]: +; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs( +; CHECK-INLOOP-VF4IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-INLOOP-VF4IC4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI1]], [[TMP1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI2]], [[TMP1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI3]], [[TMP1]] +; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP3]], [[TMP2]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX4:%.*]] = or <4 x i1> [[TMP4]], [[BIN_RDX]] +; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX5:%.*]] = or <4 x i1> [[TMP5]], [[BIN_RDX4]] +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX5]]) +; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 [[B]], i32 [[A]] +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF4IC4: [[LOOP]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3 +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]] +; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-INLOOP-VF4IC4: [[EXIT]]: +; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]] +; +; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs( +; CHECK-INLOOP-VF1IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) { +; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = icmp eq i32 [[A]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2]] = or i1 [[VEC_PHI]], [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3]] = or i1 [[VEC_PHI1]], [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4]] = or i1 [[VEC_PHI2]], [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI3]], [[TMP1]] +; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP3]], [[TMP2]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP4]], [[BIN_RDX]] +; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP5]], [[BIN_RDX4]] +; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = freeze i1 [[BIN_RDX5]] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[B]], i32 [[A]] +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]] +; CHECK-INLOOP-VF1IC4: [[LOOP]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3 +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]] +; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-INLOOP-VF1IC4: [[EXIT]]: +; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]] +; entry: br label %loop @@ -1315,3 +2370,48 @@ exit: ; preds = %loop ; CHECK-VF1IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} ; CHECK-VF1IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} ;. +; CHECK-INLOOP-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-INLOOP-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-INLOOP-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-INLOOP-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. +; CHECK-INLOOP-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-INLOOP-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-INLOOP-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-INLOOP-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK-INLOOP-VF4IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF4IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. +; CHECK-INLOOP-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-INLOOP-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-INLOOP-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-INLOOP-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; CHECK-INLOOP-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK-INLOOP-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; CHECK-INLOOP-VF1IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF1IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]} +; CHECK-INLOOP-VF1IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF1IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]} +; CHECK-INLOOP-VF1IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-INLOOP-VF1IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +;.