diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index fa104e4f69d7f..4e4e9a9ab8a74 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -204,8 +204,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // setbc instruction. if (!Subtarget.hasP10Vector()) { setOperationAction(ISD::SSUBO, MVT::i32, Custom); - if (isPPC64) + setOperationAction(ISD::SADDO, MVT::i32, Custom); + if (isPPC64) { setOperationAction(ISD::SSUBO, MVT::i64, Custom); + setOperationAction(ISD::SADDO, MVT::i64, Custom); + } } // Match BITREVERSE to customized fast code sequence in the td file. @@ -12614,6 +12617,31 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Sub, OverflowTrunc}, dl); } +SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const { + + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + EVT VT = Op.getNode()->getValueType(0); + + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS); + + SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue NotXor1 = DAG.getNOT(dl, Xor1, VT); + SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Add, LHS); + + SDValue And = DAG.getNode(ISD::AND, dl, VT, NotXor1, Xor2); + + SDValue Overflow = + DAG.getNode(ISD::SRL, dl, VT, And, + DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32)); + + SDValue OverflowTrunc = + DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow); + + return DAG.getMergeValues({Add, OverflowTrunc}, dl); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12638,6 +12666,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::SSUBO: return LowerSSUBO(Op, DAG); + case ISD::SADDO: + return LowerSADDO(Op, DAG); case ISD::INLINEASM: case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 669430550f4e6..0c19632ab5b33 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1283,6 +1283,7 @@ namespace llvm { SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSADDO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll index c0f3b60122521..5cd96ec219404 100644 --- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll +++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll @@ -49,12 +49,11 @@ entry: define i1 @test_saddo_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_saddo_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: add 5, 3, 4 -; CHECK-NEXT: cmpwi 1, 4, 0 -; CHECK-NEXT: cmpw 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 4, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 5, 3, 4 +; CHECK-NEXT: add 4, 3, 4 +; CHECK-NEXT: xor 3, 4, 3 +; CHECK-NEXT: andc 3, 3, 5 +; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr entry: %res = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind @@ -65,12 +64,11 @@ entry: define i1 @test_saddo_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_saddo_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: add 5, 3, 4 -; CHECK-NEXT: cmpdi 1, 4, 0 -; CHECK-NEXT: cmpd 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 4, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 5, 3, 4 +; CHECK-NEXT: add 4, 3, 4 +; CHECK-NEXT: xor 3, 4, 3 +; CHECK-NEXT: andc 3, 3, 5 +; CHECK-NEXT: rldicl 3, 3, 1, 63 ; CHECK-NEXT: blr entry: %res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind