From e23a33ac3b7e907182d2756387102a2cb5829662 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 24 Nov 2025 17:04:01 +0800 Subject: [PATCH 1/3] [VPlan] Set ZeroIsPoison=false whne lowering FirstActiveLane When interleaving a loop with an early exit, the parts before the active lane will be all zero. Currently we emit @llvm.experimental.cttz.elts with ZeroIsPoison=true for these parts, which means that they will produce poison. We don't see any miscompiles today on AArch64 because it has the same lowering for cttz.elts regardless of ZeroIsPoison, but this may cause issues on RISC-V when interleaving. This fixes it by setting ZeroIsPoison=false. The codegen is worse when ZeroIsPoison=false and we could potentially recover it by enabling it again when UF=1, but this is left to another PR. This is split off from #168738, where LastActiveLane can get expanded to a FirstActiveLane with an all-zeroes mask. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 8 +-- .../AArch64/simple_early_exit.ll | 14 ++-- .../AArch64/single-early-exit-interleave.ll | 8 +-- .../single-early-exit-cond-poison.ll | 6 +- .../single-early-exit-deref-assumptions.ll | 14 ++-- .../single-early-exit-interleave.ll | 64 +++++++++---------- .../LoopVectorize/single_early_exit.ll | 4 +- .../single_early_exit_live_outs.ll | 48 +++++++------- ...or-loop-backedge-elimination-early-exit.ll | 8 +-- 9 files changed, 87 insertions(+), 87 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7c9302860a3b5..779f82d1d4699 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1005,7 +1005,7 @@ Value *VPInstruction::generate(VPTransformState &State) { if (getNumOperands() == 1) { Value *Mask = State.get(getOperand(0)); return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask, - true, Name); + /*ZeroIsPoison=*/false, Name); } // If there are multiple operands, create a chain of selects to pick the // first operand with an active lane and add the number of lanes of the @@ -1021,9 +1021,9 @@ Value *VPInstruction::generate(VPTransformState &State) { Builder.CreateICmpEQ(State.get(getOperand(Idx)), Builder.getFalse()), Builder.getInt64Ty()) - : Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), - State.get(getOperand(Idx)), - true, Name); + : Builder.CreateCountTrailingZeroElems( + Builder.getInt64Ty(), State.get(getOperand(Idx)), + /*ZeroIsPoison=*/false, Name); Value *Current = Builder.CreateAdd( Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros); if (Res) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 3b016f8d0a9ff..63348ccf94f78 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -44,7 +44,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP16]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP16]], i1 false) ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP20]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -125,7 +125,7 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -187,7 +187,7 @@ define i64 @loop_contains_safe_call() #1 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -256,7 +256,7 @@ define i64 @loop_contains_safe_div() #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[INDEX1]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1( [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP16]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -336,7 +336,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -483,12 +483,12 @@ exit: define i64 @same_exit_block_requires_interleaving() { ; CHECK-LABEL: define i64 @same_exit_block_requires_interleaving() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [128 x %my.struct], align 8 +; CHECK-NEXT: [[P1:%.*]] = alloca [128 x [[MY_STRUCT:%.*]]], align 8 ; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 256) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x %my.struct], ptr [[P1]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x [[MY_STRUCT]]], ptr [[P1]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 ; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_LATCH]], label [[LOOP_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll index b40a184a3e425..c56f8327a48b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll @@ -79,20 +79,20 @@ define i64 @same_exit_block_pre_inc_use1() #0 { ; CHECK: vector.early.exit: ; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP39]], 16 -; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP59]], i1 true) +; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP59]], i1 false) ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP40]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[TMP41]] -; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 true) +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP31]], i1 false) ; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], [[TMP44]] ; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP44]], [[TMP40]] ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 [[TMP46]], i64 [[TMP43]] -; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP30]], i1 true) +; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP30]], i1 false) ; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP40]], 1 ; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], [[TMP49]] ; CHECK-NEXT: [[TMP52:%.*]] = icmp ne i64 [[TMP49]], [[TMP40]] ; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], i64 [[TMP51]], i64 [[TMP48]] -; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 true) +; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1( [[TMP32]], i1 false) ; CHECK-NEXT: [[TMP55:%.*]] = mul i64 [[TMP40]], 0 ; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP55]], [[TMP61]] ; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i64 [[TMP61]], [[TMP40]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll index 794e274a2628c..f11f35319b8fc 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -31,9 +31,9 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2: [[MIDDLE_BLOCK]]: ; VF4IC2-NEXT: br label %[[RETURN:.*]] ; VF4IC2: [[VECTOR_EARLY_EXIT]]: -; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; VF4IC2-NEXT: [[TMP10:%.*]] = add i64 4, [[TMP9]] -; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; VF4IC2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]] ; VF4IC2-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP11]], 4 ; VF4IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[TMP10]] @@ -64,7 +64,7 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1: [[MIDDLE_BLOCK]]: ; VF8IC1-NEXT: br label %[[RETURN:.*]] ; VF8IC1: [[VECTOR_EARLY_EXIT]]: -; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false) ; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 0, [[TMP6]] ; VF8IC1-NEXT: br label %[[RETURN]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index 03b7ed7fe2135..0bc2748b6252d 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -28,7 +28,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: @@ -140,7 +140,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero(ptr n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[LOOP_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: br label %[[LOOP_END_LOOPEXIT]] ; CHECK: [[SCALAR_PH]]: @@ -336,7 +336,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] @@ -431,7 +431,7 @@ define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %fi ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] @@ -525,7 +525,7 @@ define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] @@ -602,7 +602,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]] ; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: @@ -740,7 +740,7 @@ define i64 @find_if_pointer_distance_deref_via_assumption(ptr %vec) nofree nosyn ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[BEGIN]], i64 [[TMP13]] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index ed5dcc78eeb78..053863117bdc8 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -124,17 +124,17 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -211,17 +211,17 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 true) +; VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 false) ; VF4IC4-NEXT: [[TMP16:%.*]] = add i64 12, [[TMP15]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP28]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP28]], i1 false) ; VF4IC4-NEXT: [[TMP18:%.*]] = add i64 8, [[TMP30]] ; VF4IC4-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 [[TMP16]] -; VF4IC4-NEXT: [[TMP21:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP14]], i1 true) +; VF4IC4-NEXT: [[TMP21:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP14]], i1 false) ; VF4IC4-NEXT: [[TMP22:%.*]] = add i64 4, [[TMP21]] ; VF4IC4-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP21]], 4 ; VF4IC4-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i64 [[TMP22]], i64 [[TMP20]] -; VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; VF4IC4-NEXT: [[TMP26:%.*]] = add i64 0, [[TMP25]] ; VF4IC4-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP25]], 4 ; VF4IC4-NEXT: [[TMP6:%.*]] = select i1 [[TMP27]], i64 [[TMP26]], i64 [[TMP24]] @@ -304,17 +304,17 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -401,17 +401,17 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -503,17 +503,17 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 true) +; VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 12, [[TMP20]] -; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP23:%.*]] = add i64 8, [[TMP22]] ; VF4IC4-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP22]], 4 ; VF4IC4-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[TMP23]], i64 [[TMP21]] -; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[TMP26:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 4, [[TMP26]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP26]], 4 ; VF4IC4-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP25]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 0, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] @@ -623,17 +623,17 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[TMP28:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP43]], i1 true) +; VF4IC4-NEXT: [[TMP28:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP43]], i1 false) ; VF4IC4-NEXT: [[TMP29:%.*]] = add i64 12, [[TMP28]] -; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) +; VF4IC4-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 false) ; VF4IC4-NEXT: [[TMP31:%.*]] = add i64 8, [[TMP30]] ; VF4IC4-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP30]], 4 ; VF4IC4-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] -; VF4IC4-NEXT: [[TMP34:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; VF4IC4-NEXT: [[TMP34:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; VF4IC4-NEXT: [[TMP35:%.*]] = add i64 4, [[TMP34]] ; VF4IC4-NEXT: [[TMP36:%.*]] = icmp ne i64 [[TMP34]], 4 ; VF4IC4-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i64 [[TMP35]], i64 [[TMP33]] -; VF4IC4-NEXT: [[TMP38:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) +; VF4IC4-NEXT: [[TMP38:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 false) ; VF4IC4-NEXT: [[TMP39:%.*]] = add i64 0, [[TMP38]] ; VF4IC4-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP38]], 4 ; VF4IC4-NEXT: [[TMP10:%.*]] = select i1 [[TMP40]], i64 [[TMP39]], i64 [[TMP37]] @@ -734,17 +734,17 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[LOOP_END:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; VF4IC4-NEXT: [[TMP20:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; VF4IC4-NEXT: [[TMP21:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE8]] ; VF4IC4-NEXT: [[TMP22:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE8]], 4 ; VF4IC4-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i64 [[TMP21]], i64 [[TMP20]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP29]], i1 false) ; VF4IC4-NEXT: [[TMP24:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE9]] ; VF4IC4-NEXT: [[TMP25:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE9]], 4 ; VF4IC4-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i64 [[TMP24]], i64 [[TMP23]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) ; VF4IC4-NEXT: [[TMP27:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: [[TMP28:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE1]], 4 ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP28]], i64 [[TMP27]], i64 [[TMP26]] @@ -861,17 +861,17 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP37]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP37]], i1 false) ; VF4IC4-NEXT: [[TMP28:%.*]] = add i64 12, [[FIRST_ACTIVE_LANE]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP20]], i1 false) ; VF4IC4-NEXT: [[TMP29:%.*]] = add i64 8, [[FIRST_ACTIVE_LANE15]] ; VF4IC4-NEXT: [[TMP30:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE15]], 4 ; VF4IC4-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i64 [[TMP29]], i64 [[TMP28]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE16:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE16:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; VF4IC4-NEXT: [[TMP32:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE16]] ; VF4IC4-NEXT: [[TMP33:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE16]], 4 ; VF4IC4-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i64 [[TMP32]], i64 [[TMP31]] -; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 true) +; VF4IC4-NEXT: [[FIRST_ACTIVE_LANE1:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP21]], i1 false) ; VF4IC4-NEXT: [[TMP35:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE1]] ; VF4IC4-NEXT: [[TMP36:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE1]], 4 ; VF4IC4-NEXT: [[TMP10:%.*]] = select i1 [[TMP36]], i64 [[TMP35]], i64 [[TMP34]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 4fd8d17073de4..ae03f2426a800 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -424,7 +424,7 @@ define i64 @loop_guard_needed_to_prove_dereferenceable(i32 %x, i1 %cmp2) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: scalar.ph: @@ -572,7 +572,7 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 79821b8be1734..55682bc410527 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -32,7 +32,7 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -96,7 +96,7 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP10]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[DOTCAST]], 2 @@ -160,7 +160,7 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[FIRST_ACTIVE_LANE]] to i128 ; CHECK-NEXT: [[TMP9:%.*]] = add i128 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i128 [[TMP9]] to i32 @@ -226,7 +226,7 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[TMP10]] to float ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]] @@ -294,7 +294,7 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 5 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP20]] @@ -357,7 +357,7 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -420,7 +420,7 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -485,7 +485,7 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -549,7 +549,7 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -674,7 +674,7 @@ define i64 @same_exit_block_pre_inc_use3() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -739,7 +739,7 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -801,7 +801,7 @@ define i64 @same_exit_block_post_inc_use() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -861,7 +861,7 @@ define ptr @same_exit_block_post_inc_use1_ivptr() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP9]] @@ -922,7 +922,7 @@ define i64 @same_exit_block_post_inc_use2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] @@ -987,7 +987,7 @@ define i64 @diff_exit_block_pre_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1122,7 +1122,7 @@ define i64 @diff_exit_block_pre_inc_use3() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1189,7 +1189,7 @@ define i64 @diff_exit_block_post_inc_use1() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -1258,7 +1258,7 @@ define i64 @diff_exit_block_post_inc_use2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 3, [[TMP11]] @@ -1330,7 +1330,7 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) { ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 [[START]], [[TMP12]] @@ -1401,7 +1401,7 @@ define i64 @loop_contains_safe_call() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1463,7 +1463,7 @@ define i64 @loop_contains_safe_div() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1526,7 +1526,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] @@ -1594,7 +1594,7 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = sub i64 1023, [[TMP12]] ; CHECK-NEXT: br label [[LOOP_END:%.*]] @@ -1719,7 +1719,7 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) +; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 8da1dca52e87b..ef4d5c6d66700 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -127,7 +127,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: br label %[[EXIT:.*]] ; VF8UF1: [[VECTOR_EARLY_EXIT]]: -; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true) +; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 false) ; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; VF8UF1-NEXT: br label %[[EXIT]] ; VF8UF1: [[EXIT]]: @@ -156,9 +156,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: -; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true) +; VF8UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false) ; VF8UF2-NEXT: [[TMP7:%.*]] = add i64 8, [[TMP5]] -; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 true) +; VF8UF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP1]], i1 false) ; VF8UF2-NEXT: [[TMP9:%.*]] = add i64 0, [[TMP8]] ; VF8UF2-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP8]], 8 ; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]] @@ -185,7 +185,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[VECTOR_EARLY_EXIT]]: -; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true) +; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 false) ; VF16UF1-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] ; VF16UF1-NEXT: br label %[[EXIT]] ; VF16UF1: [[EXIT]]: From 27e371d2b590bbf9aed0282a257ddd1ad61ec52a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 24 Nov 2025 17:58:40 +0800 Subject: [PATCH 2/3] Update phaseordering tests --- .../PhaseOrdering/AArch64/std-find.ll | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index aea9a80ba6dd0..a727973b43511 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -28,7 +28,7 @@ define i64 @std_find_i16_constant_offset_with_assumptions(ptr %first.coerce, i16 ; CHECK: [[MIDDLE_SPLIT]]: ; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[RETURN:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP0]], i1 false) ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[FIRST_COERCE]], i64 [[TMP7]] @@ -149,13 +149,14 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_HEADER_I_PREHEADER2:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], -8 -; CHECK: [[TMP9:%.*]] = getelementptr -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[XTRAITER]], 1 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[PROL_ITER_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 +; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <8 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD_FR]], splat (i16 1) ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -170,10 +171,10 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[XTRAITER]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[STD_FIND_GENERIC_IMPL_EXIT]], label %[[LOOP_HEADER_I_PREHEADER2]] ; CHECK: [[LOOP_HEADER_I_PREHEADER2]]: -; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[PTR_IV_I_PH:%.*]] = phi ptr [ [[FIRST]], %[[LOOP_HEADER_I_PREHEADER]] ], [ [[NEXT_GEP]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER_I:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: -; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] From 2631daa0bfdef6ebddaa1df49c7998db35e56358 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 24 Nov 2025 22:11:16 +0800 Subject: [PATCH 3/3] Update comment --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 13582f8bd2d62..c3aa2a6c4e6b9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1096,6 +1096,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, // Calculates the first active lane index of the vector predicate operands. // It produces the lane index across all unrolled iterations. Unrolling will // add all copies of its original operand as additional operands. + // Implemented with @llvm.experimental.cttz.elts, but returns the expected + // result even with operands that are all zeroes. FirstActiveLane, // The opcodes below are used for VPInstructionWithType.