-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] limit icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1 to hasOneUse #74318
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Liao Chunyu (ChunyuLiao) ChangesFix: #30121 Testcase ir: https://gcc.godbolt.org/z/oKTPE7v48
If add the limitations, @llvm.smax shouldn't be needed.
If we need icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1, perhaps we can implement this at DAG combine. Full diff: https://github.com/llvm/llvm-project/pull/74318.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 1d09d9b44a9e5..5dafd0dcd5ef5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2231,7 +2231,8 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
//
// NB: sge/sle with a constant will canonicalize to sgt/slt.
if (Shl->hasNoSignedWrap() &&
- (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT))
+ ((Pred == ICmpInst::ICMP_SGT && Shl->hasOneUse()) ||
+ Pred == ICmpInst::ICMP_SLT))
if (C.isZero() || (Pred == ICmpInst::ICMP_SGT ? C.isAllOnes() : C.isOne()))
return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
@@ -2251,7 +2252,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
// NSW guarantees that we are only shifting out sign bits from the high bits,
// so we can ASHR the compare constant without needing a mask and eliminate
// the shift.
- if (Shl->hasNoSignedWrap()) {
+ if (Shl->hasNoSignedWrap() && Shl->hasOneUse()) {
if (Pred == ICmpInst::ICMP_SGT) {
// icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
APInt ShiftedC = C.ashr(*ShiftAmt);
diff --git a/llvm/test/Transforms/IndVarSimplify/pr30121.ll b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
new file mode 100644
index 0000000000000..c69971d46e98f
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/pr30121.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes='instcombine,indvars' < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+
+define void @g(ptr nocapture noundef %a, i32 noundef signext %n) {
+; CHECK-LABEL: @g(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[N:%.*]], 4
+; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[MUL]], 0
+; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[MUL]] to i64
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = shl nsw i32 [[TMP0]], 1
+; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
+;
+entry:
+ %mul = shl nsw i32 %n, 4
+ %cmp11 = icmp sgt i32 %mul, 0
+ br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.012 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = zext nneg i32 %i.012 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = shl nsw i32 %0, 1
+ store i32 %add, ptr %arrayidx, align 4
+ %inc = add nuw nsw i32 %i.012, 1
+ %cmp = icmp slt i32 %inc, %mul
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
|
I don't think this is the right way to fix this issue. If I'm reading the transformation log right, the problem seems to be that we lose nsw information in IndVarSimplify? |
Could you help give more information? Thanks. Violent do try. Keep shl's nsw, but it doesn't fix the issue.
|
I guess I found where SCEV takes effect against this patch in:
There are some optimizations for SCEVAddRecExpr and SCEVAddExpr in computeConstantDifference, but in this example it's: SCEVType = llvm::scUnknown, so it can't be handled |
…> C1 to hasOneUse Fix: llvm#30121 Testcase ir: https://gcc.godbolt.org/z/oKTPE7v48 ``` %cmp11 = icmp sgt i32 %n, 0 br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry %mul = shl i32 %n, 4 %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1) %wide.trip.count = zext nneg i32 %smax to i64 br label %for.body ``` If add the limitations, @llvm.smax shouldn't be needed. ``` %mul = shl nsw i32 %n, 4 %cmp11 = icmp sgt i32 %mul, 0 br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry %smax = tail call i32 @llvm.smax.i32(i32 %mul, i32 1) --- this shouldn't be needed ``` If we need icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1, perhaps we can implement this at DAG combine.
Fix: #30121
Testcase ir: https://gcc.godbolt.org/z/oKTPE7v48
If add the limitations, @llvm.smax shouldn't be needed.
If we need icmp sgt (shl nsw X, C1), C0 --> icmp sgt X, C0 >> C1, perhaps we can implement this at DAG combine.