Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
// Calculates the first active lane index of the vector predicate operands.
// It produces the lane index across all unrolled iterations. Unrolling will
// add all copies of its original operand as additional operands.
// Implemented with @llvm.experimental.cttz.elts, but returns the expected
// result even with operands that are all zeroes.
FirstActiveLane,

// The opcodes below are used for VPInstructionWithType.
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (getNumOperands() == 1) {
Value *Mask = State.get(getOperand(0));
return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask,
true, Name);
/*ZeroIsPoison=*/false, Name);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you extend the documentation for FirstActiveLane to state explicitly that any of the operands can be all zeros?

}
// If there are multiple operands, create a chain of selects to pick the
// first operand with an active lane and add the number of lanes of the
Expand All @@ -1021,9 +1021,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.CreateICmpEQ(State.get(getOperand(Idx)),
Builder.getFalse()),
Builder.getInt64Ty())
: Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(),
State.get(getOperand(Idx)),
true, Name);
: Builder.CreateCountTrailingZeroElems(
Builder.getInt64Ty(), State.get(getOperand(Idx)),
/*ZeroIsPoison=*/false, Name);
Value *Current = Builder.CreateAdd(
Builder.CreateMul(RuntimeVF, Builder.getInt64(Idx)), TrailingZeros);
if (Res) {
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP16]], i1 true)
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP16]], i1 false)
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP20]]
; CHECK-NEXT: br label [[LOOP_END]]
Expand Down Expand Up @@ -125,7 +125,7 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP_END:%.*]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true)
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]]
; CHECK-NEXT: br label [[LOOP_END]]
Expand Down Expand Up @@ -187,7 +187,7 @@ define i64 @loop_contains_safe_call() #1 {
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP_END:%.*]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false)
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]]
; CHECK-NEXT: br label [[LOOP_END]]
Expand Down Expand Up @@ -256,7 +256,7 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[INDEX1]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH:%.*]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP15]], i1 true)
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP15]], i1 false)
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP16]]
; CHECK-NEXT: br label [[LOOP_END]]
Expand Down Expand Up @@ -336,7 +336,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK: middle.block:
; CHECK-NEXT: br label [[LOOP_END:%.*]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
Expand Down Expand Up @@ -483,12 +483,12 @@ exit:
define i64 @same_exit_block_requires_interleaving() {
; CHECK-LABEL: define i64 @same_exit_block_requires_interleaving() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P1:%.*]] = alloca [128 x %my.struct], align 8
; CHECK-NEXT: [[P1:%.*]] = alloca [128 x [[MY_STRUCT:%.*]]], align 8
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 256)
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x %my.struct], ptr [[P1]], i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [128 x [[MY_STRUCT]]], ptr [[P1]], i64 0, i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_LATCH]], label [[LOOP_END:%.*]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,20 +79,20 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
; CHECK: vector.early.exit:
; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP39]], 16
; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP59]], i1 true)
; CHECK-NEXT: [[TMP41:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP59]], i1 false)
; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[TMP40]], 3
; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[TMP42]], [[TMP41]]
; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP31]], i1 true)
; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP31]], i1 false)
; CHECK-NEXT: [[TMP45:%.*]] = mul i64 [[TMP40]], 2
; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], [[TMP44]]
; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP44]], [[TMP40]]
; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 [[TMP46]], i64 [[TMP43]]
; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP30]], i1 true)
; CHECK-NEXT: [[TMP49:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP30]], i1 false)
; CHECK-NEXT: [[TMP50:%.*]] = mul i64 [[TMP40]], 1
; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = icmp ne i64 [[TMP49]], [[TMP40]]
; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], i64 [[TMP51]], i64 [[TMP48]]
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 false)
; CHECK-NEXT: [[TMP55:%.*]] = mul i64 [[TMP40]], 0
; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP55]], [[TMP61]]
; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i64 [[TMP61]], [[TMP40]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ define noundef i32 @f(i32 noundef %g) {
; VF4IC2: [[MIDDLE_BLOCK]]:
; VF4IC2-NEXT: br label %[[RETURN:.*]]
; VF4IC2: [[VECTOR_EARLY_EXIT]]:
; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
; VF4IC2-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false)
; VF4IC2-NEXT: [[TMP10:%.*]] = add i64 4, [[TMP9]]
; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
; VF4IC2-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; VF4IC2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]]
; VF4IC2-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP11]], 4
; VF4IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[TMP10]]
Expand Down Expand Up @@ -64,7 +64,7 @@ define noundef i32 @f(i32 noundef %g) {
; VF8IC1: [[MIDDLE_BLOCK]]:
; VF8IC1-NEXT: br label %[[RETURN:.*]]
; VF8IC1: [[VECTOR_EARLY_EXIT]]:
; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true)
; VF8IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 false)
; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 0, [[TMP6]]
; VF8IC1-NEXT: br label %[[RETURN]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]]
; CHECK-NEXT: br label %[[LOOP_END]]
; CHECK: [[LOOP_END]]:
Expand Down Expand Up @@ -140,7 +140,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero(ptr n
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[LOOP_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]]
; CHECK-NEXT: br label %[[LOOP_END_LOOPEXIT]]
; CHECK: [[SCALAR_PH]]:
Expand Down Expand Up @@ -336,7 +336,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false)
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]]
Expand Down Expand Up @@ -431,7 +431,7 @@ define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %fi
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
Expand Down Expand Up @@ -525,7 +525,7 @@ define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last)
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
Expand Down Expand Up @@ -602,7 +602,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[TMP7]]
; CHECK-NEXT: br label %[[LOOP_END]]
; CHECK: [[LOOP_END]]:
Expand Down Expand Up @@ -740,7 +740,7 @@ define i64 @find_if_pointer_distance_deref_via_assumption(ptr %vec) nofree nosyn
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false)
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[BEGIN]], i64 [[TMP13]]
Expand Down
Loading
Loading