Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4172,7 +4172,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
// Set of already visited types.
DenseSet<Type *> Visited;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
vp_depth_first_deep(Plan.getVectorPreheader()))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, intuitively this doesn't seem right to check anything outside the vector region

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding was that the vector preheader is outside of the vector region? So this will traverse through vector.ph -> vector loop region -> middle.block -> scalar.ph -> etc.

for (VPRecipeBase &R : *VPBB) {
if (EphemeralRecipes.contains(&R))
continue;
Expand All @@ -4192,6 +4192,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPEVLBasedIVPHISC:
case VPDef::VPPredInstPHISC:
case VPDef::VPBranchOnMaskSC:
case VPDef::VPIRInstructionSC:
continue;
case VPDef::VPReductionSC:
case VPDef::VPActiveLaneMaskPHISC:
Expand Down Expand Up @@ -4247,6 +4248,9 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
// operand.
VPValue *ToCheck =
R.getNumDefinedValues() >= 1 ? R.getVPValue(0) : R.getOperand(1);
// Don't consider values that didn't come from the original scalar IR.
if (!ToCheck->getUnderlyingValue())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use isLiveIn and getLiveInIRValue for clarity?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if getLiveInIRValue is necessary: perhaps checking isLiveIn is sufficient?

Copy link
Contributor Author

@lukel97 lukel97 Jul 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think live ins are different, they're just LLVM IR values plumbed directly into VPlan like constants/loop invariant stuff etc.

IIUC getUnderlyingValue should return the original scalar IR value that a widened recipe came from, which might be loop variant.

continue;
Type *ScalarTy = TypeInfo.inferScalarType(ToCheck);
if (!Visited.insert({ScalarTy}).second)
continue;
Expand Down
109 changes: 55 additions & 54 deletions llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
Original file line number Diff line number Diff line change
Expand Up @@ -737,12 +737,15 @@ exit:
ret void
}

define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
define void @exit_cond_zext_iv(ptr %dst, i64 %N) #0 {
; DEFAULT-LABEL: define void @exit_cond_zext_iv(
; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; DEFAULT-NEXT: [[ENTRY:.*]]:
; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP7]], 8
; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], [[TMP8]]
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; DEFAULT: [[VECTOR_SCEVCHECK]]:
; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
Expand All @@ -754,18 +757,23 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; DEFAULT-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
; DEFAULT-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], [[TMP10]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP13]], 8
; DEFAULT-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[INDEX]], i32 2
; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
; DEFAULT-NEXT: store i32 0, ptr [[TMP9]], align 8
; DEFAULT-NEXT: store i32 0, ptr [[TMP10]], align 8
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 [[TMP16]]
; DEFAULT-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[TMP14]], align 4
; DEFAULT-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[TMP17]], align 4
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
Expand All @@ -778,8 +786,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; DEFAULT: [[LOOP]]:
; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
; DEFAULT-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 8
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV_CONV]]
; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 4
; DEFAULT-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
; DEFAULT-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64
; DEFAULT-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]]
Expand All @@ -788,63 +796,56 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; DEFAULT-NEXT: ret void
;
; PRED-LABEL: define void @exit_cond_zext_iv(
; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; PRED-NEXT: [[ENTRY:.*]]:
; PRED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like @fhahn add this test to gain coverage of loops with casted indices. See 6084dcb. Presumably the reason we're not vectorising this loop any more is because for fixed-width vectors there are no vector operations (because the store is scalarised?) and for scalable vectors the cost is too high. It would be good if you could rewrite this test to avoid having to use scatter instructions in the loop, i.e. by modifying this

%gep = getelementptr {[100 x i32], i32, i32}, ptr %dst, i64 %iv.conv, i32 2

so that we're doing contiguous stores. Then you'll presumably need to add a new dedicated test to demonstrate the effect of the code change in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I ended up also needing to add +sve to this test, because without it the masked load always gets scalarized. I've added a new dedicated test in llvm/test/Transforms/LoopVectorize/AArch64/no-vector-instructions.ll

; PRED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; PRED: [[VECTOR_SCEVCHECK]]:
; PRED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
; PRED-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
; PRED-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]]
; PRED-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1
; PRED-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; PRED-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
; PRED-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; PRED-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; PRED-NEXT: [[TMP2:%.*]] = add i32 1, [[TMP1]]
; PRED-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1
; PRED-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
; PRED-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
; PRED-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], [[TMP8]]
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
; PRED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; PRED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[UMAX1]], [[TMP12]]
; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[UMAX1]], [[TMP12]]
; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX1]])
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ]
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; PRED-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
; PRED-NEXT: [[TMP7:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT3]]
; PRED-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
; PRED-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
; PRED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
; PRED-NEXT: [[TMP10:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP9]], i32 2
; PRED-NEXT: store i32 0, ptr [[TMP10]], align 8
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
; PRED-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
; PRED-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
; PRED: [[PRED_STORE_IF4]]:
; PRED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
; PRED-NEXT: [[TMP13:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP12]], i32 2
; PRED-NEXT: store i32 0, ptr [[TMP13]], align 8
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]]
; PRED: [[PRED_STORE_CONTINUE5]]:
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; PRED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
; PRED-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0
; PRED-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; PRED: [[MIDDLE_BLOCK]]:
; PRED-NEXT: br label %[[EXIT:.*]]
; PRED: [[SCALAR_PH]]:
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; PRED-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; PRED-NEXT: br label %[[LOOP:.*]]
; PRED: [[LOOP]]:
; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
; PRED-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
; PRED-NEXT: store i32 0, ptr [[GEP]], align 8
; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
; PRED-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV_CONV]]
; PRED-NEXT: store i32 0, ptr [[GEP]], align 4
; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
; PRED-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64
; PRED-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]]
Expand All @@ -858,8 +859,8 @@ entry:
loop:
%iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop ]
%iv.conv = phi i64 [ 0, %entry ], [ %iv.ext, %loop ]
%gep = getelementptr {[100 x i32], i32, i32}, ptr %dst, i64 %iv.conv, i32 2
store i32 0, ptr %gep, align 8
%gep = getelementptr i32, ptr %dst, i64 %iv.conv
store i32 0, ptr %gep, align 4
%iv.1.next = add i32 %iv.1, 1
%iv.ext = zext i32 %iv.1.next to i64
%c = icmp ult i64 %iv.ext, %N
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt < %s -p loop-vectorize -S -mtriple aarch64 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s

define void @f(ptr %p, i64 %x, i64 %n) {
; CHECK-LABEL: define void @f(
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV1]]
; CHECK-NEXT: store i64 [[IV1]], ptr [[GEP1]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV1]], [[N]]
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%gep = getelementptr i64, ptr %p, i64 %iv
store i64 %iv, ptr %gep
%iv.next = add i64 %iv, 2
%done = icmp eq i64 %iv, %n
br i1 %done, label %exit, label %loop

exit:
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,14 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) {
; CHECK-LABEL: define void @test_scalar_steps_target_instruction_cost(
; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], splat (i64 8)
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP2]]
; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP5]]
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_CONTINUE2]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 3
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV]], 22
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
Expand All @@ -65,9 +32,3 @@ loop:
exit:
ret void
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.
Loading