Skip to content

Conversation

AditiRM
Copy link
Member

@AditiRM AditiRM commented Sep 17, 2025

This patch is to improve the codegen for saddo node for i32 & i64 in 32-bit and 64-bit mode by custom lowering.

@llvmbot
Copy link
Member

llvmbot commented Sep 17, 2025

@llvm/pr-subscribers-backend-powerpc

Author: Aditi Medhane (AditiRM)

Changes

This patch is to improve the codegen for saddo node for i32 & i64 in 32-bit and 64-bit mode by custom lowering.

Testcase :
> c > #include <stdbool.h> > #include <stdint.h> > > bool test_sadd_overflow(int a, int b, int *c) { > return __builtin_sadd_overflow(a, b, c); > } > > bool test_saddl_overflow(long long a, long long b, long long *c){ > return __builtin_saddll_overflow(a, b, c); > } >

> bash > ibm-clang -O2 -mcpu=power8 -m64 -S test.c -o test_final.s >

Assembly comparison (test_sadd_overflow):

Before After
add 6, 3, 4<br>srwi 4, 4, 31<br>extsw 6, 6<br>sub 3, 6, 3<br>stw 6, 0(5)<br>rldicl 3, 3, 1, 63<br>xor 3, 4, 3 xor 6, 3, 4<br>add 4, 3, 4<br>xor 3, 4, 3<br>stw 4, 0(5)<br>andc 3, 3, 6<br>rlwinm 3, 3, 1, 31, 31

Assembly comparison (test_saddl_overflow):

Before After
add 6, 3, 4<br>rldicl 8, 3, 1, 63<br>rldicl 4, 4, 1, 63<br>std 6, 0(5)<br>sradi 7, 6, 63<br>subc 3, 6, 3<br>adde 3, 8, 7<br>xori 3, 3, 1<br>xor 3, 4, 3 xor 6, 3, 4<br>add 4, 3, 4<br>xor 3, 4, 3<br>std 4, 0(5)<br>andc 3, 3, 6<br>rldicl 3, 3, 1, 63

Full diff: https://github.com/llvm/llvm-project/pull/159255.diff

3 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+31-1)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+1)
  • (modified) llvm/test/CodeGen/PowerPC/saddo-ssubo.ll (+10-12)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index fa104e4f69d7f..0798dae3a14a2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -204,8 +204,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   // setbc instruction.
   if (!Subtarget.hasP10Vector()) {
     setOperationAction(ISD::SSUBO, MVT::i32, Custom);
-    if (isPPC64)
+    setOperationAction(ISD::SADDO, MVT::i32, Custom);
+    if (isPPC64){
       setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+      setOperationAction(ISD::SADDO, MVT::i64, Custom);
+    }
   }
 
   // Match BITREVERSE to customized fast code sequence in the td file.
@@ -12614,6 +12617,31 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
 }
 
+SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
+
+  SDLoc dl(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  EVT VT = Op.getNode()->getValueType(0);
+
+  SDValue Add = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
+
+  SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+  SDValue NotXor1 = DAG.getNOT(dl, Xor1, VT);
+  SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Add, LHS);
+
+  SDValue And = DAG.getNode(ISD::AND, dl, VT, NotXor1, Xor2);
+
+  SDValue Overflow =
+      DAG.getNode(ISD::SRL, dl, VT, And,
+                  DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
+
+  SDValue OverflowTrunc =
+      DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+
+  return DAG.getMergeValues({Add, OverflowTrunc}, dl);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12638,6 +12666,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
   case ISD::SSUBO:
     return LowerSSUBO(Op, DAG);
+  case ISD::SADDO:
+    return LowerSADDO(Op, DAG);
 
   case ISD::INLINEASM:
   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 669430550f4e6..0c19632ab5b33 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1283,6 +1283,7 @@ namespace llvm {
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSADDO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
index c0f3b60122521..5cd96ec219404 100644
--- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
+++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
@@ -49,12 +49,11 @@ entry:
 define i1 @test_saddo_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: test_saddo_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    add 5, 3, 4
-; CHECK-NEXT:    cmpwi 1, 4, 0
-; CHECK-NEXT:    cmpw 5, 3
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    creqv 20, 4, 0
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 5, 3, 4
+; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    xor 3, 4, 3
+; CHECK-NEXT:    andc 3, 3, 5
+; CHECK-NEXT:    srwi 3, 3, 31
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
@@ -65,12 +64,11 @@ entry:
 define i1 @test_saddo_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: test_saddo_i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    add 5, 3, 4
-; CHECK-NEXT:    cmpdi 1, 4, 0
-; CHECK-NEXT:    cmpd 5, 3
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    creqv 20, 4, 0
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    xor 5, 3, 4
+; CHECK-NEXT:    add 4, 3, 4
+; CHECK-NEXT:    xor 3, 4, 3
+; CHECK-NEXT:    andc 3, 3, 5
+; CHECK-NEXT:    rldicl 3, 3, 1, 63
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind

Copy link

github-actions bot commented Sep 17, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants