-
Notifications
You must be signed in to change notification settings - Fork 11.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CVP] Infer nneg on existing zext #72052
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch infers llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp Lines 74 to 83 in 40671bb
This is an alternative to #72049. Full diff: https://github.com/llvm/llvm-project/pull/72052.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ece22428e3cbdce..eb63c56a451d108 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -93,6 +93,7 @@ STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
STATISTIC(NumUDivURemsNarrowedExpanded,
"Number of bound udiv's/urem's expanded");
+STATISTIC(NumZExt, "Number of non-negative deductions");
static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition()))
@@ -1032,6 +1033,24 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
return true;
}
+static bool processZExt(ZExtInst *SDI, LazyValueInfo *LVI) {
+ if (SDI->getType()->isVectorTy())
+ return false;
+
+ if (SDI->hasNonNeg())
+ return false;
+
+ const Use &Base = SDI->getOperandUse(0);
+ if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
+ .isAllNonNegative())
+ return false;
+
+ ++NumZExt;
+ SDI->setNonNeg();
+
+ return true;
+}
+
static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
using OBO = OverflowingBinaryOperator;
@@ -1162,6 +1181,9 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
case Instruction::SExt:
BBChanged |= processSExt(cast<SExtInst>(&II), LVI);
break;
+ case Instruction::ZExt:
+ BBChanged |= processZExt(cast<ZExtInst>(&II), LVI);
+ break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/zext.ll b/llvm/test/Transforms/CorrelatedValuePropagation/zext.ll
new file mode 100644
index 000000000000000..fcf6177127de5f5
--- /dev/null
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/zext.ll
@@ -0,0 +1,166 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s
+
+; Check that debug locations are preserved. For more info see:
+; https://llvm.org/docs/SourceLevelDebugging.html#fixing-errors
+; RUN: opt < %s -enable-debugify -passes=correlated-propagation -S 2>&1 | \
+; RUN: FileCheck %s -check-prefix=DEBUG
+; DEBUG: CheckModuleDebugify: PASS
+
+declare void @use64(i64)
+
+define void @test1(i32 %n) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -1
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext nneg i32 [[A]] to i64
+; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
+; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE]] to i32
+; CHECK-NEXT: br label [[FOR_COND]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %a = phi i32 [ %n, %entry ], [ %ext, %for.body ]
+ %cmp = icmp sgt i32 %a, -1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %ext.wide = zext i32 %a to i64
+ call void @use64(i64 %ext.wide)
+ %ext = trunc i64 %ext.wide to i32
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+;; Negative test to show transform doesn't happen unless n >= 0.
+define void @test2(i32 %n) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -2
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
+; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE]] to i32
+; CHECK-NEXT: br label [[FOR_COND]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %a = phi i32 [ %n, %entry ], [ %ext, %for.body ]
+ %cmp = icmp sgt i32 %a, -2
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %ext.wide = zext i32 %a to i64
+ call void @use64(i64 %ext.wide)
+ %ext = trunc i64 %ext.wide to i32
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+;; Non looping test case.
+define void @test3(i32 %n) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1
+; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
+; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %n, -1
+ br i1 %cmp, label %bb, label %exit
+
+bb:
+ %ext.wide = zext i32 %n to i64
+ call void @use64(i64 %ext.wide)
+ %ext = trunc i64 %ext.wide to i32
+ br label %exit
+
+exit:
+ ret void
+}
+
+;; Non looping negative test case.
+define void @test4(i32 %n) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -2
+; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
+; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp sgt i32 %n, -2
+ br i1 %cmp, label %bb, label %exit
+
+bb:
+ %ext.wide = zext i32 %n to i64
+ call void @use64(i64 %ext.wide)
+ %ext = trunc i64 %ext.wide to i32
+ br label %exit
+
+exit:
+ ret void
+}
+
+define i64 @may_including_undef(i1 %c.1, i1 %c.2) {
+; CHECK-LABEL: @may_including_undef(
+; CHECK-NEXT: br i1 [[C_1:%.*]], label [[TRUE_1:%.*]], label [[FALSE:%.*]]
+; CHECK: true.1:
+; CHECK-NEXT: br i1 [[C_2:%.*]], label [[TRUE_2:%.*]], label [[EXIT:%.*]]
+; CHECK: true.2:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: false:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[TRUE_1]] ], [ 1, [[TRUE_2]] ], [ undef, [[FALSE]] ]
+; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[P]] to i64
+; CHECK-NEXT: ret i64 [[EXT]]
+;
+ br i1 %c.1, label %true.1, label %false
+
+true.1:
+ br i1 %c.2, label %true.2, label %exit
+
+true.2:
+ br label %exit
+
+false:
+ br label %exit
+
+exit:
+ %p = phi i32 [ 0, %true.1 ], [ 1, %true.2], [ undef, %false ]
+ %ext = zext i32 %p to i64
+ ret i64 %ext
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 2c78b7208c19ff2..baeaef0b67b041c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -20,11 +20,11 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; AUTO_VEC: for.body.preheader:
-; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
+; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[N]] to i64
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 32
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
; AUTO_VEC: vector.ph:
-; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 4294967264
+; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 2147483616
; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01
; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
@@ -103,12 +103,12 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; AUTO_VEC: for.body.preheader:
-; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
+; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[N]] to i64
; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[ZEXT]], 7
; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N]], 8
; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; AUTO_VEC: for.body.preheader.new:
-; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 4294967288
+; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 2147483640
; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]]
; AUTO_VEC: for.body:
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
index 80f96b17c9083e4..4ff67b56016118d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
@@ -171,11 +171,11 @@ define void @test_runtime_trip_count(i32 %N) {
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: for.body.preheader:
-; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
index e169f2570cd0e5b..40ea616331d46ea 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
@@ -24,11 +24,11 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) {
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[S:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
-; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[S]] to i64
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[S]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[S]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER5:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967288
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483640
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index 0fbbafca696c827..246bb0095e1a258 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -26,7 +26,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER9:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
|
895c74b
to
05aacbb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Do you plan to implement SCCP support as well? |
Yeah. I will post a patch later. |
This patch infers `nneg` flags for existing zext instructions in SCCP. Similar patch: #72052
This patch infers `nneg` flags for existing zext instructions in CVP. After llvm#71534 and this patch, we can drop `zext -> zext nneg` transform in `RISCVCodeGenPrepare`: https://github.com/llvm/llvm-project/blob/40671bbdefb6ff83e2685576a3cb041b62f25bbe/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp#L74-L83 This is an alternative to llvm#72049.
…lvm#72053) After llvm#71534 and llvm#72052, the transform `zext -> zext nneg` in `RISCVCodeGenPrepare` is redundant.
This patch infers `nneg` flags for existing zext instructions in SCCP. Similar patch: llvm#72052
This patch infers
nneg
flags for existing zext instructions in CVP.After #71534 and this patch, we can drop
zext -> zext nneg
transform inRISCVCodeGenPrepare
:llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
Lines 74 to 83 in 40671bb
This is an alternative to #72049.