-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[PowerPC] Add custom lowering for SADD overflow for i32 and i64 #159255
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-powerpc Author: Aditi Medhane (AditiRM) ChangesThis patch is to improve the codegen for saddo node for i32 & i64 in 32-bit and 64-bit mode by custom lowering. Testcase : > Assembly comparison (test_sadd_overflow):
Assembly comparison (test_saddl_overflow):
Full diff: https://github.com/llvm/llvm-project/pull/159255.diff 3 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa104e4f69d7f..0798dae3a14a2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -204,8 +204,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// setbc instruction.
if (!Subtarget.hasP10Vector()) {
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
- if (isPPC64)
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ if (isPPC64){
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+ setOperationAction(ISD::SADDO, MVT::i64, Custom);
+ }
}
// Match BITREVERSE to customized fast code sequence in the td file.
@@ -12614,6 +12617,31 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
}
+SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
+
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ EVT VT = Op.getNode()->getValueType(0);
+
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
+
+ SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue NotXor1 = DAG.getNOT(dl, Xor1, VT);
+ SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Add, LHS);
+
+ SDValue And = DAG.getNode(ISD::AND, dl, VT, NotXor1, Xor2);
+
+ SDValue Overflow =
+ DAG.getNode(ISD::SRL, dl, VT, And,
+ DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
+
+ SDValue OverflowTrunc =
+ DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+
+ return DAG.getMergeValues({Add, OverflowTrunc}, dl);
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12638,6 +12666,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::SSUBO:
return LowerSSUBO(Op, DAG);
+ case ISD::SADDO:
+ return LowerSADDO(Op, DAG);
case ISD::INLINEASM:
case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 669430550f4e6..0c19632ab5b33 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1283,6 +1283,7 @@ namespace llvm {
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSADDO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
index c0f3b60122521..5cd96ec219404 100644
--- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
+++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
@@ -49,12 +49,11 @@ entry:
define i1 @test_saddo_i32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: test_saddo_i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: add 5, 3, 4
-; CHECK-NEXT: cmpwi 1, 4, 0
-; CHECK-NEXT: cmpw 5, 3
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: creqv 20, 4, 0
-; CHECK-NEXT: isel 3, 0, 3, 20
+; CHECK-NEXT: xor 5, 3, 4
+; CHECK-NEXT: add 4, 3, 4
+; CHECK-NEXT: xor 3, 4, 3
+; CHECK-NEXT: andc 3, 3, 5
+; CHECK-NEXT: srwi 3, 3, 31
; CHECK-NEXT: blr
entry:
%res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
@@ -65,12 +64,11 @@ entry:
define i1 @test_saddo_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: test_saddo_i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: add 5, 3, 4
-; CHECK-NEXT: cmpdi 1, 4, 0
-; CHECK-NEXT: cmpd 5, 3
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: creqv 20, 4, 0
-; CHECK-NEXT: isel 3, 0, 3, 20
+; CHECK-NEXT: xor 5, 3, 4
+; CHECK-NEXT: add 4, 3, 4
+; CHECK-NEXT: xor 3, 4, 3
+; CHECK-NEXT: andc 3, 3, 5
+; CHECK-NEXT: rldicl 3, 3, 1, 63
; CHECK-NEXT: blr
entry:
%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
This patch is to improve the codegen for saddo node for i32 & i64 in 32-bit and 64-bit mode by custom lowering.