-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SelectionDAG]: Have isKnownNeverZero treat SRL like division if all else fails #89523
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: AtariDreams (AtariDreams) ChangesFull diff: https://github.com/llvm/llvm-project/pull/89523.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7dbf83b7adeef0..3b8368a9a73b91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5443,10 +5443,17 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (ValKnown.isNegative())
return true;
// If max shift cnt of known ones is non-zero, result is non-zero.
- APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
+ const KnownBits Shift = computeKnownBits(Op.getOperand(1), Depth + 1);
+ APInt MaxCnt = Shift.getMaxValue();
if (MaxCnt.ult(ValKnown.getBitWidth()) &&
!ValKnown.One.lshr(MaxCnt).isZero())
return true;
+ // We try to see if we can turn it into a division
+ const KnownBits One =
+ KnownBits::makeConstant(APInt(ValKnown.getBitWidth(), 1));
+ if (KnownBits::uge(ValKnown,
+ KnownBits::lshr(One, Shift, Shift.isNonZero())))
+ return true;
break;
}
case ISD::UDIV:
@@ -5455,6 +5462,10 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
// TODO: For udiv this is also true if Op1 u<= Op0
if (Op->getFlags().hasExact())
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ if (KnownBits::uge(computeKnownBits(Op.getOperand(1), Depth + 1),
+ computeKnownBits(Op.getOperand(0), Depth + 1)))
+ return true;
break;
case ISD::ADD:
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index e183bbc15617d5..31b4731613def9 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -118,12 +118,13 @@ define i1 @pow2_srl_fail0(i32 %x, i32 %y) {
; CHECK-LABEL: pow2_srl_fail0:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $30, %cl
-; CHECK-NEXT: notl %edi
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shll %cl, %edi
-; CHECK-NEXT: testl $1048576, %edi # imm = 0x100000
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: shll %cl, %eax
+; CHECK-NEXT: shrl $20, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%yy = and i32 %y, 30
%d = lshr i32 1048576, %yy
@@ -349,9 +350,8 @@ define i1 @pow2_umax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmoval %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -482,9 +482,8 @@ define i1 @pow2_smax_fail0(i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: cmpl %esi, %eax
; CHECK-NEXT: cmovgl %eax, %esi
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %esi
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %esi, %edi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -555,9 +554,8 @@ define i1 @pow2_select_fail0(i1 %c, i32 %x, i32 %y, i32 %z) {
; CHECK-NEXT: shrl %cl, %r8d
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: cmovnel %edx, %r8d
-; CHECK-NEXT: notl %esi
-; CHECK-NEXT: testl %esi, %r8d
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: testl %r8d, %esi
+; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%yy = shl i32 1, %y
%zz = lshr i32 1073741824, %z
@@ -696,10 +694,9 @@ define <4 x i1> @pow2_vselect_fail0_ne(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y,
; CHECK-NEXT: pand %xmm0, %xmm2
; CHECK-NEXT: pandn %xmm7, %xmm0
; CHECK-NEXT: por %xmm2, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: retq
%yy = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
%zz = lshr <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, %z
|
f315ede
to
fed8480
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
1f91c46
to
1611a27
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you try to more closely match the ValueTracking.cpp implementation which tries to do the same thing - I don't think you need to bother adding a isNonZeroShift helper but you should be trying to use the same methodology.
What cases does this handle that were not handled already? Intuitively it seems like |
No description provided.