-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoopVectorize] Fix divide-by-zero bug (#80836) #81721
Conversation
@llvm/pr-subscribers-llvm-transforms Author: David Sherwood (david-arm) ChangesWhen attempting to use the estimated trip count to refine the costs of the runtime memory checks we should also check for sane trip counts to prevent divide-by-zero faults on some platforms. Full diff: https://github.com/llvm/llvm-project/pull/81721.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1a7b301c35f2b8..26065865cb420c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2106,18 +2106,20 @@ class GeneratedRTChecks {
BestTripCount = *EstimatedTC;
}
- InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
+ if (BestTripCount > 1) {
+ InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
- // Let's ensure the cost is always at least 1.
- NewMemCheckCost = std::max(*NewMemCheckCost.getValue(),
- (InstructionCost::CostType)1);
+ // Let's ensure the cost is always at least 1.
+ NewMemCheckCost = std::max(*NewMemCheckCost.getValue(),
+ (InstructionCost::CostType)1);
- LLVM_DEBUG(dbgs()
- << "We expect runtime memory checks to be hoisted "
- << "out of the outer loop. Cost reduced from "
- << MemCheckCost << " to " << NewMemCheckCost << '\n');
+ LLVM_DEBUG(dbgs()
+ << "We expect runtime memory checks to be hoisted "
+ << "out of the outer loop. Cost reduced from "
+ << MemCheckCost << " to " << NewMemCheckCost << '\n');
- MemCheckCost = NewMemCheckCost;
+ MemCheckCost = NewMemCheckCost;
+ }
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll
index 8a796bb3065b19..800c55d6740bc8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll
@@ -177,6 +177,43 @@ outer.exit:
}
+define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
+; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_minus1'
+; CHECK: Calculating cost of runtime checks:
+; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced
+; CHECK: Total cost of runtime checks: 6
+; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
+entry:
+ br label %outer.loop
+
+outer.loop:
+ %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
+ %mul.us = mul nsw i64 %outer.iv, %n
+ br label %inner.loop
+
+inner.loop:
+ %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
+ %add.us = add nuw nsw i64 %inner.iv, %mul.us
+ %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
+ %0 = load i8, ptr %arrayidx.us, align 1
+ %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
+ %1 = load i8, ptr %arrayidx7.us, align 1
+ %add9.us = add i8 %1, %0
+ store i8 %add9.us, ptr %arrayidx7.us, align 1
+ %inner.iv.next = add nuw nsw i64 %inner.iv, 1
+ %exitcond.not = icmp eq i64 %inner.iv.next, %n
+ br i1 %exitcond.not, label %inner.exit, label %inner.loop
+
+inner.exit:
+ %outer.iv.next = add nuw nsw i64 %outer.iv, 1
+ %exitcond26.not = icmp eq i64 %outer.iv.next, %m
+ br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !1
+
+outer.exit:
+ ret void
+}
+
+
define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks'
; CHECK: Calculating cost of runtime checks:
@@ -215,3 +252,4 @@ outer.exit:
!0 = !{!"branch_weights", i32 10, i32 20}
+!1 = !{!"branch_weights", i32 1, i32 -1}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please add Fixes #80836
to the description of the PR so the issue will get auto-closed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
When attempting to use the estimated trip count to refine the costs of the runtime memory checks we should also check for sane trip counts to prevent divide-by-zero faults on some platforms.
When attempting to use the estimated trip count to refine the costs of the runtime memory checks we should also check for sane trip counts to prevent divide-by-zero faults on some platforms.
Fixes #80836