Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9362,13 +9362,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks,
// one exists.
TotalCost += calculateEarlyExitCost(CostCtx, Plan, VF.Width);

// If the expected trip count is less than the VF, the vector loop will only
// execute a single iteration. Then the middle block is executed the same
// number of times as the vector region.
// TODO: Extend logic to always account for the cost of the middle block.
auto ExpectedTC = getSmallBestKnownTC(PSE, L);
if (ExpectedTC && ElementCount::isKnownLE(*ExpectedTC, VF.Width))
TotalCost += Plan.getMiddleBlock()->cost(VF.Width, CostCtx);
TotalCost += Plan.getMiddleBlock()->cost(VF.Width, CostCtx);

// When interleaving only scalar and vector cost will be equal, which in turn
// would lead to a divide by 0. Fall back to hard threshold.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ define i64 @vectorization_not_profitable_due_to_trunc(ptr dereferenceable(800) %
; CHECK-NEXT: Calculating cost of work in exit block vector.early.exit:
; CHECK-NEXT: Cost of 1 for VF 1: EMIT vp<%first.active.lane> = first-active-lane ir<%t>
; CHECK-NEXT: Cost of 0 for VF 1: EMIT vp<%early.exit.value> = extract-lane vp<%first.active.lane>, ir<%l>
; CHECK-NEXT: LV: Vectorization is possible but not beneficial.
; CHECK: LV: Vectorization is possible but not beneficial.
entry:
br label %loop.header

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define void @no_outer_loop(ptr nocapture noundef %a, ptr nocapture noundef reado
; CHECK: Calculating cost of runtime checks:
; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop.
; CHECK: Total cost of runtime checks: 4
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
br label %inner.loop

Expand All @@ -34,7 +34,7 @@ define void @outer_no_tc(ptr nocapture noundef %a, ptr nocapture noundef readonl
; CHECK: Calculating cost of runtime checks:
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 3
; CHECK: Total cost of runtime checks: 3
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
br label %outer.loop

Expand Down Expand Up @@ -71,7 +71,7 @@ define void @outer_known_tc3(ptr nocapture noundef %a, ptr nocapture noundef rea
; CHECK: Calculating cost of runtime checks:
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
; CHECK: Total cost of runtime checks: 2
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
br label %outer.loop

Expand Down Expand Up @@ -108,7 +108,7 @@ define void @outer_known_tc64(ptr nocapture noundef %a, ptr nocapture noundef re
; CHECK: Calculating cost of runtime checks:
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 1
; CHECK: Total cost of runtime checks: 1
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
br label %outer.loop

Expand Down Expand Up @@ -145,7 +145,7 @@ define void @outer_pgo_3(ptr nocapture noundef %a, ptr nocapture noundef readonl
; CHECK: Calculating cost of runtime checks:
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
; CHECK: Total cost of runtime checks: 2
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
br label %outer.loop

Expand Down Expand Up @@ -182,7 +182,7 @@ define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef re
; CHECK: Calculating cost of runtime checks:
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 1
; CHECK: Total cost of runtime checks: 1
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
; CHECK: LV: Minimum required TC for runtime checks to be profitable:16
entry:
br label %outer.loop

Expand Down Expand Up @@ -219,7 +219,7 @@ define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr n
; CHECK: Calculating cost of runtime checks:
; CHECK: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced from 6 to 2
; CHECK: Total cost of runtime checks: 2
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:4
; CHECK: LV: Minimum required TC for runtime checks to be profitable:4
entry:
br label %outer.loop

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[IDX]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP3]], i32 6)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], [[UMAX]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
Expand Down
Loading