-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PowerPC] Combine sub within setcc back to sext #66978
base: main
Are you sure you want to change the base?
Conversation
if (LHS.getOpcode() == ISD::ADD && isa<ConstantSDNode>(LHS.getOperand(1))) { | ||
uint64_t Addend = cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue(); | ||
if (OpVT == MVT::i64) { | ||
// (a-2^(M-1)) => sext(trunc(a, M), 64) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When is this equation hold?
Please provide more description in PR summary. |
The patterns are ; ult (add x -0x80000000) -0x100000000 -> ne x (sext:i64 (trunc:i32 x))
; ult (add x -0x8000) -0x10000 -> ne x (sext:i64 (trunc:i16 x))
; ult (add x -0x80) -0x100 -> ne x (sext:i64 (trunc:i8 x))
; ult (add x -0x80) -0x100 -> ne x (sext:i32 (trunc:i16 x))
; ult (add x -0x80) -0x100 -> ne x (sext:i16 (trunc:i8 x))
; ult (srl (add x -0x8000) 16) 0xffff -> ne x (sext:i32 (trunc:i16 x)) Only the last one differs from the rest, because it's been transformed in generic DAG combiner. (https://alive2.llvm.org/ce/z/jAHgKM ) The rest are just reverting transformation of InstCombine. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
; CHECK-NEXT: xor 3, 4, 3 | ||
; CHECK-NEXT: cntlzw 3, 3 | ||
; CHECK-NEXT: srwi 3, 3, 5 | ||
; CHECK-NEXT: xori 3, 3, 1 | ||
; CHECK-NEXT: blr |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will investigate about the regression.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should not be regression. Parameter should have signext
.
@llvm/pr-subscribers-backend-powerpc Author: Qiu Chaofan (ecnelises) ChangesInstCombine does below transformation: ; bool cmp(i64 a) { return a != (i64)(iN)a; }
%0 = add i64 %a, -2^(N-1)
%1 = icmp ult i64 %0, -2^N
ret i1 %1
; bool cmp(i64 a) { return a == (i64)(iN)a; }
%0 = add i64 %a, 2^(N-1)
%1 = icmp ult i64 %0, 2^N
ret i1 %1 On PowerPC, it's profitable to combine this add-cmpult back to original trunc-ext-cmpeq. Full diff: https://github.com/llvm/llvm-project/pull/66978.diff 2 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 43e4a34a9b3483..b13d603efb9479 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14527,15 +14527,18 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
-SDValue PPCTargetLowering::combineSetCC(SDNode *N,
- DAGCombinerInfo &DCI) const {
- assert(N->getOpcode() == ISD::SETCC &&
- "Should be called with a SETCC node");
+SDValue PPCTargetLowering::combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDLoc DL(N);
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ EVT OpVT = LHS.getValueType();
+
if (CC == ISD::SETNE || CC == ISD::SETEQ) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
@@ -14546,15 +14549,49 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N,
// x != 0-y --> x+y != 0
if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
RHS.hasOneUse()) {
- SDLoc DL(N);
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
- EVT OpVT = LHS.getValueType();
SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
}
}
+ if (CC == ISD::SETULT) {
+ auto GetTruncExtCmp = [&](SDValue Src, EVT DstVT) {
+ return DAG.getSetCC(
+ DL, VT, Src,
+ DAG.getSExtOrTrunc(DAG.getSExtOrTrunc(Src, DL, DstVT), DL, OpVT),
+ ISD::SETNE);
+ };
+ // ult (add x -0x80000000) -0x100000000 -> ne x (sext:i64 (trunc:i32 x))
+ // ult (add x -0x8000) -0x10000 -> ne x (sext:i64 (trunc:i16 x))
+ // ult (add x -0x80) -0x100 -> ne x (sext:i64/i32/i16 (trunc:i8/i16/i8 x))
+ if (LHS.getOpcode() == ISD::ADD) {
+ const auto *Addend = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+ const auto *RhsC = dyn_cast<ConstantSDNode>(RHS);
+ if (Addend && RhsC) {
+ int64_t AddendVal = Addend->getSExtValue();
+ int64_t RhsVal = RhsC->getSExtValue();
+ if (AddendVal == -0x80000000L && RhsVal == -0x100000000L &&
+ OpVT == MVT::i64)
+ return GetTruncExtCmp(LHS.getOperand(0), MVT::i32);
+ if (AddendVal == -0x8000 && RhsVal == -0x10000 && OpVT == MVT::i64)
+ return GetTruncExtCmp(LHS.getOperand(0), MVT::i16);
+ if (AddendVal == -0x80 && RhsVal == -0x100 &&
+ (OpVT == MVT::i64 || OpVT == MVT::i32 || OpVT == MVT::i16))
+ return GetTruncExtCmp(LHS.getOperand(0), MVT::i8);
+ }
+ // ult (srl (add x -0x8000) 16) 0xffff -> ne x (sext:i32 (trunc:i16 x))
+ } else if (LHS.getOpcode() == ISD::SRL &&
+ LHS.getOperand(0).getOpcode() == ISD::ADD) {
+ const auto *SrlAmt = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+ const auto *Addend =
+ dyn_cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1));
+ const auto *RhsC = dyn_cast<ConstantSDNode>(RHS);
+ if (SrlAmt && Addend && RhsC && SrlAmt->getSExtValue() == 16 &&
+ Addend->getSExtValue() == -0x8000 && RhsC->getSExtValue() == 0xffff)
+ return GetTruncExtCmp(LHS.getOperand(0).getOperand(0), MVT::i8);
+ }
+ }
+
return DAGCombineTruncBoolExt(N, DCI);
}
diff --git a/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll b/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll
index 20dcb8ccf4908a..cb6170ab175e7e 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-to-sub.ll
@@ -92,12 +92,10 @@ entry:
define zeroext i1 @test5(i64 %a) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: addis 3, 3, -32768
-; CHECK-NEXT: rldic 4, 4, 32, 0
-; CHECK-NEXT: subc 4, 3, 4
-; CHECK-NEXT: subfe 3, 3, 3
-; CHECK-NEXT: neg 3, 3
+; CHECK-NEXT: extsw 4, 3
+; CHECK-NEXT: xor 3, 3, 4
+; CHECK-NEXT: addic 4, 3, -1
+; CHECK-NEXT: subfe 3, 4, 3
; CHECK-NEXT: blr
entry:
%0 = add i64 %a, -2147483648
@@ -108,11 +106,10 @@ entry:
define zeroext i1 @test6(i64 %a) {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi 3, 3, -32768
-; CHECK-NEXT: lis 4, -1
-; CHECK-NEXT: subc 4, 3, 4
-; CHECK-NEXT: subfe 3, 3, 3
-; CHECK-NEXT: neg 3, 3
+; CHECK-NEXT: extsh 4, 3
+; CHECK-NEXT: xor 3, 3, 4
+; CHECK-NEXT: addic 4, 3, -1
+; CHECK-NEXT: subfe 3, 4, 3
; CHECK-NEXT: blr
entry:
%0 = add i64 %a, -32768
@@ -123,11 +120,10 @@ entry:
define zeroext i1 @test7(i64 %a) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi 3, 3, -128
-; CHECK-NEXT: li 4, -256
-; CHECK-NEXT: subc 4, 3, 4
-; CHECK-NEXT: subfe 3, 3, 3
-; CHECK-NEXT: neg 3, 3
+; CHECK-NEXT: extsb 4, 3
+; CHECK-NEXT: xor 3, 3, 4
+; CHECK-NEXT: addic 4, 3, -1
+; CHECK-NEXT: subfe 3, 4, 3
; CHECK-NEXT: blr
entry:
%0 = add i64 %a, -128
@@ -135,15 +131,14 @@ entry:
ret i1 %cmp
}
-define zeroext i1 @test8(i32 %a) {
+define zeroext i1 @test8(i32 signext %a) {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi 3, 3, -32768
-; CHECK-NEXT: lis 4, -1
-; CHECK-NEXT: rlwinm 3, 3, 16, 16, 31
-; CHECK-NEXT: ori 4, 4, 1
-; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: rldicl 3, 3, 1, 63
+; CHECK-NEXT: extsb 4, 3
+; CHECK-NEXT: xor 3, 3, 4
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
entry:
%0 = add i32 %a, -32768
@@ -151,16 +146,14 @@ entry:
ret i1 %cmp
}
-define zeroext i1 @test9(i32 %a) {
+define zeroext i1 @test9(i32 signext %a) {
; CHECK-LABEL: test9:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lis 4, -256
-; CHECK-NEXT: addi 3, 3, -128
-; CHECK-NEXT: ori 4, 4, 1
-; CHECK-NEXT: clrldi 3, 3, 32
-; CHECK-NEXT: rldic 4, 4, 8, 0
-; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: rldicl 3, 3, 1, 63
+; CHECK-NEXT: extsb 4, 3
+; CHECK-NEXT: xor 3, 3, 4
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
entry:
%0 = add i32 %a, -128
@@ -168,15 +161,14 @@ entry:
ret i1 %cmp
}
-define zeroext i1 @test10(i16 %a) {
+define zeroext i1 @test10(i16 signext %a) {
; CHECK-LABEL: test10:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi 3, 3, -128
-; CHECK-NEXT: lis 4, -1
-; CHECK-NEXT: clrlwi 3, 3, 16
-; CHECK-NEXT: ori 4, 4, 256
-; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: rldicl 3, 3, 1, 63
+; CHECK-NEXT: extsb 4, 3
+; CHECK-NEXT: xor 3, 3, 4
+; CHECK-NEXT: cntlzw 3, 3
+; CHECK-NEXT: srwi 3, 3, 5
+; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr
entry:
%0 = add i16 %a, -128
|
InstCombine does below transformation:
On PowerPC, it's profitable to combine this add-cmpult back to original trunc-ext-cmpeq.