diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 13582f8bd2d62..c3aa2a6c4e6b9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1096,6 +1096,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, // Calculates the first active lane index of the vector predicate operands. // It produces the lane index across all unrolled iterations. Unrolling will // add all copies of its original operand as additional operands. + // Implemented with @llvm.experimental.cttz.elts, but returns the expected + // result even with operands that are all zeroes. FirstActiveLane, // The opcodes below are used for VPInstructionWithType. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7c9302860a3b5..779f82d1d4699 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1005,7 +1005,7 @@ Value *VPInstruction::generate(VPTransformState &State) { if (getNumOperands() == 1) { Value *Mask = State.get(getOperand(0)); return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask, - true, Name); + /*ZeroIsPoison=*/false, Name); } // If there are multiple operands, create a chain of selects to pick the // first operand with an active lane and add the number of lanes of the @@ -1021,9 +1021,9 @@ Value *VPInstruction::generate(VPTransformState &State) { Builder.CreateICmpEQ(State.get(getOperand(Idx)), Builder.getFalse()), Builder.getInt64Ty()) - : Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), - State.get(getOperand(Idx)), - true, Name); + : Builder.CreateCountTrailingZeroElems( + Builder.getInt64Ty(), State.get(getOperand(Idx)), + /*ZeroIsPoison=*/false, Name); Value *Current = Builder.CreateAdd( Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros); if (Res) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 3b016f8d0a9ff..63348ccf94f78 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -44,7 +44,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP16]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP16]], i1 false) ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP20]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -125,7 +125,7 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -187,7 +187,7 @@ define i64 @loop_contains_safe_call() #1 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -256,7 +256,7 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[INDEX1]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP16]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -336,7 +336,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -483,12 +483,12 @@ exit: define i64 @same_exit_block_requires_interleaving() { ; CHECK-LABEL: define i64 @same_exit_block_requires_interleaving() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [128 x %my.struct], align 8 +; CHECK-NEXT: [[P1:%.*]] = alloca [128 x [[MY_STRUCT:%.*]]], align 8 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 256) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x %my.struct], ptr [[P1]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x [[MY_STRUCT]]], ptr [[P1]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 ; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_LATCH]], label [[LOOP_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index b40a184a3e425..c56f8327a48b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -79,20 +79,20 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK: vector.early.exit: ; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP39]], 16 -; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP59]], i1 true) +; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP59]], i1 false) ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP40]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[TMP41]] -; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 true) +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 false) ; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], [[TMP44]] ; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP44]], [[TMP40]] ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 [[TMP46]], i64 [[TMP43]] -; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP30]], i1 true) +; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP30]], i1 false) ; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP40]], 1 ; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = icmp ne i64 [[TMP49]], [[TMP40]] ; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], i64 [[TMP51]], i64 [[TMP48]] -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 false) ; CHECK-NEXT: [[TMP55:%.*]] = mul i64 [[TMP40]], 0 ; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP55]], [[TMP61]] ; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i64 [[TMP61]], [[TMP40]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll index 794e274a2628c..f11f35319b8fc 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -31,9 +31,9 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2: [[MIDDLE_BLOCK]]: ; VF4IC2-NEXT: br label %[[RETURN:.*]] ; VF4IC2: [[VECTOR_EARLY_EXIT]]: -; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; VF4IC2-NEXT: [[TMP10:%.*]] = add i64 4, [[TMP9]] -; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; VF4IC2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]] ; VF4IC2-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP11]], 4 ; VF4IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[TMP10]] @@ -64,7 +64,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1: [[MIDDLE_BLOCK]]: ; VF8IC1-NEXT: br label %[[RETURN:.*]] ; VF8IC1: [[VECTOR_EARLY_EXIT]]: -; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false) ; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 0, [[TMP6]] ; VF8IC1-NEXT: br label %[[RETURN]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index 03b7ed7fe2135..0bc2748b6252d 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -28,7 +28,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: @@ -140,7 +140,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero(ptr n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[LOOP_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: br label %[[LOOP_END_LOOPEXIT]] ; CHECK: [[SCALAR_PH]]: @@ -336,7 +336,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] @@ -431,7 +431,7 @@ define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %fi ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] @@ -525,7 +525,7 @@ define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] @@ -602,7 +602,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: @@ -740,7 +740,7 @@ define i64 @find_if_pointer_distance_deref_via_assumption(ptr %vec) nofree nosyn ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[BEGIN]], i64 [[TMP13]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index ed5dcc78eeb78..053863117bdc8 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -124,17 +124,17 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -211,17 +211,17 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 true) +; VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 false) ; VF4IC4-NEXT: [[TMP16:%.*]] = add i64 12, [[TMP15]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP28]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP28]], i1 false) ; VF4IC4-NEXT: [[TMP18:%.*]] = add i64 8, [[TMP30]] ; VF4IC4-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 [[TMP16]] -; VF4IC4-NEXT: [[TMP21:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP14]], i1 true) +; VF4IC4-NEXT: [[TMP21:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP14]], i1 false) ; VF4IC4-NEXT: [[TMP22:%.*]] = add i64 4, [[TMP21]] ; VF4IC4-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP21]], 4 ; VF4IC4-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i64 [[TMP22]], i64 [[TMP20]] -; VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; VF4IC4-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]] ; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP25]], 4 ; VF4IC4-NEXT: [[TMP6:%.*]] = select i1 [[TMP27]], i64 [[TMP26]], i64 [[TMP24]] @@ -304,17 +304,17 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -401,17 +401,17 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -503,17 +503,17 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -623,17 +623,17 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP28:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP43]], i1 true) +; VF4IC4-NEXT: [[TMP28:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP43]], i1 false) ; VF4IC4-NEXT: [[TMP29:%.*]] = add i64 12, [[TMP28]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 8, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] -; VF4IC4-NEXT: [[TMP34:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; VF4IC4-NEXT: [[TMP34:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; VF4IC4-NEXT: [[TMP35:%.*]] = add i64 4, [[TMP34]] ; VF4IC4-NEXT: [[TMP36:%.*]] = icmp ne i64 [[TMP34]], 4 ; VF4IC4-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i64 [[TMP35]], i64 [[TMP33]] -; VF4IC4-NEXT: [[TMP38:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) +; VF4IC4-NEXT: [[TMP38:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 false) ; VF4IC4-NEXT: [[TMP39:%.*]] = add i64 0, [[TMP38]] ; VF4IC4-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP38]], 4 ; VF4IC4-NEXT: [[TMP10:%.*]] = select i1 [[TMP40]], i64 [[TMP39]], i64 [[TMP37]] @@ -734,17 +734,17 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; VF4IC4-NEXT: [[TMP20:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE8]] ; VF4IC4-NEXT: [[TMP22:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE8]], 4 ; VF4IC4-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i64 [[TMP21]], i64 [[TMP20]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 false) ; VF4IC4-NEXT: [[TMP24:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE9]] ; VF4IC4-NEXT: [[TMP25:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE9]], 4 ; VF4IC4-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i64 [[TMP24]], i64 [[TMP23]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE1]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP26]] @@ -861,17 +861,17 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP37]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP37]], i1 false) ; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 false) ; VF4IC4-NEXT: [[TMP29:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE15]] ; VF4IC4-NEXT: [[TMP30:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE15]], 4 ; VF4IC4-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i64 [[TMP29]], i64 [[TMP28]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE16:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE16:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE16]] ; VF4IC4-NEXT: [[TMP33:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE16]], 4 ; VF4IC4-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i64 [[TMP32]], i64 [[TMP31]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 false) ; VF4IC4-NEXT: [[TMP35:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: [[TMP36:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE1]], 4 ; VF4IC4-NEXT: [[TMP10:%.*]] = select i1 [[TMP36]], i64 [[TMP35]], i64 [[TMP34]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 4fd8d17073de4..ae03f2426a800 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -424,7 +424,7 @@ define i64 @loop_guard_needed_to_prove_dereferenceable(i32 %x, i1 %cmp2) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: scalar.ph: @@ -572,7 +572,7 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 79821b8be1734..55682bc410527 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -32,7 +32,7 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -96,7 +96,7 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP10]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[DOTCAST]], 2 @@ -160,7 +160,7 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[FIRST_ACTIVE_LANE]] to i128 ; CHECK-NEXT: [[TMP9:%.*]] = add i128 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i128 [[TMP9]] to i32 @@ -226,7 +226,7 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[TMP10]] to float ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]] @@ -294,7 +294,7 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 5 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP20]] @@ -357,7 +357,7 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -420,7 +420,7 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -485,7 +485,7 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -549,7 +549,7 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -674,7 +674,7 @@ define i64 @same_exit_block_pre_inc_use3() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -739,7 +739,7 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -801,7 +801,7 @@ define i64 @same_exit_block_post_inc_use() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -861,7 +861,7 @@ define ptr @same_exit_block_post_inc_use1_ivptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP9]] @@ -922,7 +922,7 @@ define i64 @same_exit_block_post_inc_use2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] @@ -987,7 +987,7 @@ define i64 @diff_exit_block_pre_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1122,7 +1122,7 @@ define i64 @diff_exit_block_pre_inc_use3() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1189,7 +1189,7 @@ define i64 @diff_exit_block_post_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1258,7 +1258,7 @@ define i64 @diff_exit_block_post_inc_use2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 3, [[TMP11]] @@ -1330,7 +1330,7 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 [[START]], [[TMP12]] @@ -1401,7 +1401,7 @@ define i64 @loop_contains_safe_call() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1463,7 +1463,7 @@ define i64 @loop_contains_safe_div() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1526,7 +1526,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1594,7 +1594,7 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = sub i64 1023, [[TMP12]] ; CHECK-NEXT: br label [[LOOP_END:%.*]] @@ -1719,7 +1719,7 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 8da1dca52e87b..ef4d5c6d66700 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -127,7 +127,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: br label %[[EXIT:.*]] ; VF8UF1: [[VECTOR_EARLY_EXIT]]: -; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true) +; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 false) ; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; VF8UF1-NEXT: br label %[[EXIT]] ; VF8UF1: [[EXIT]]: @@ -156,9 +156,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: -; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false) ; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 8, [[TMP5]] -; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 true) +; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 false) ; VF8UF2-NEXT: [[TMP9:%.*]] = add i64 0, [[TMP8]] ; VF8UF2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP8]], 8 ; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]] @@ -185,7 +185,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[VECTOR_EARLY_EXIT]]: -; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true) +; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 false) ; VF16UF1-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] ; VF16UF1-NEXT: br label %[[EXIT]] ; VF16UF1: [[EXIT]]: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index aea9a80ba6dd0..a727973b43511 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -28,7 +28,7 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16 ; CHECK: [[MIDDLE_SPLIT]]: ; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[RETURN:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 false) ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 [[TMP7]] @@ -149,13 +149,14 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_HEADER_I_PREHEADER2:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], -8 -; CHECK: [[TMP9:%.*]] = getelementptr -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[XTRAITER]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[PROL_ITER_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <8 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD_FR]], splat (i16 1) ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -170,10 +171,10 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I_PREHEADER2]] ; CHECK: [[LOOP_HEADER_I_PREHEADER2]]: -; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[NEXT_GEP]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER_I:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]