Skip to content

Commit

Permalink
[RISCV] Custom legalize i32 SADDO/SSUBO with RV64LegaI32.
Browse files Browse the repository at this point in the history
The default legalization uses 2 compares and an xor. We can instead
use add+addw+xor+snez like we do without RV64LegaI32.
  • Loading branch information
topperc committed Feb 3, 2024
1 parent d62c570 commit f090924
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 30 deletions.
28 changes: 26 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,10 +278,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
MVT::i32, Custom);
}
} else
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i32, Custom);
} else {
setLibcallName(
{RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
Expand Down Expand Up @@ -5354,6 +5355,26 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
return Op;
}

// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
"Unexpected custom legalisation");
if (isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();

bool IsAdd = Op.getOpcode() == ISD::SADDO;
SDLoc DL(Op);
SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
SDValue WideOp = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
DAG.getValueType(MVT::i32));
SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
ISD::SETNE);
return DAG.getMergeValues({Res, Ovf}, DL);
}

// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
Expand Down Expand Up @@ -5873,6 +5894,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
return lowerRETURNADDR(Op, DAG);
case ISD::SADDO:
case ISD::SSUBO:
return lowerSADDO_SSUBO(Op, DAG);
case ISD::SMULO:
return lowerSMULO(Op, DAG);
case ISD::SHL_PARTS:
Expand Down
52 changes: 24 additions & 28 deletions llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ define zeroext i1 @saddo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
; RV64-LABEL: saddo1.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a3, a0, a1
; RV64-NEXT: slt a0, a3, a0
; RV64-NEXT: slti a1, a1, 0
; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: sw a3, 0(a2)
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: xor a3, a1, a3
; RV64-NEXT: snez a0, a3
; RV64-NEXT: sw a1, 0(a2)
; RV64-NEXT: ret
entry:
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
Expand Down Expand Up @@ -199,10 +199,10 @@ entry:
define zeroext i1 @ssubo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
; RV64-LABEL: ssubo1.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sgtz a3, a1
; RV64-NEXT: subw a1, a0, a1
; RV64-NEXT: slt a0, a1, a0
; RV64-NEXT: xor a0, a3, a0
; RV64-NEXT: subw a3, a0, a1
; RV64-NEXT: sub a1, a0, a1
; RV64-NEXT: xor a3, a1, a3
; RV64-NEXT: snez a0, a3
; RV64-NEXT: sw a1, 0(a2)
; RV64-NEXT: ret
entry:
Expand Down Expand Up @@ -479,8 +479,7 @@ define i32 @saddo.select.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: saddo.select.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a2, a0, a1
; RV64-NEXT: slt a2, a2, a0
; RV64-NEXT: slti a3, a1, 0
; RV64-NEXT: add a3, a0, a1
; RV64-NEXT: bne a3, a2, .LBB28_2
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
Expand All @@ -497,9 +496,9 @@ define i1 @saddo.not.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: saddo.not.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a2, a0, a1
; RV64-NEXT: slt a0, a2, a0
; RV64-NEXT: slti a1, a1, 0
; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: xor a0, a0, a2
; RV64-NEXT: snez a0, a0
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
entry:
Expand Down Expand Up @@ -606,10 +605,9 @@ entry:
define i32 @ssubo.select.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: ssubo.select.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sgtz a2, a1
; RV64-NEXT: subw a3, a0, a1
; RV64-NEXT: slt a3, a3, a0
; RV64-NEXT: bne a2, a3, .LBB36_2
; RV64-NEXT: subw a2, a0, a1
; RV64-NEXT: sub a3, a0, a1
; RV64-NEXT: bne a3, a2, .LBB36_2
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB36_2: # %entry
Expand All @@ -624,10 +622,10 @@ entry:
define i1 @ssubo.not.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: ssubo.not.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sgtz a2, a1
; RV64-NEXT: subw a1, a0, a1
; RV64-NEXT: slt a0, a1, a0
; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: subw a2, a0, a1
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: xor a0, a0, a2
; RV64-NEXT: snez a0, a0
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
entry:
Expand Down Expand Up @@ -873,9 +871,8 @@ define zeroext i1 @saddo.br.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: saddo.br.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a2, a0, a1
; RV64-NEXT: slt a0, a2, a0
; RV64-NEXT: slti a1, a1, 0
; RV64-NEXT: beq a1, a0, .LBB52_2
; RV64-NEXT: add a0, a0, a1
; RV64-NEXT: beq a0, a2, .LBB52_2
; RV64-NEXT: # %bb.1: # %overflow
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
Expand Down Expand Up @@ -973,10 +970,9 @@ continue:
define zeroext i1 @ssubo.br.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: ssubo.br.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: sgtz a2, a1
; RV64-NEXT: subw a1, a0, a1
; RV64-NEXT: slt a0, a1, a0
; RV64-NEXT: beq a2, a0, .LBB56_2
; RV64-NEXT: subw a2, a0, a1
; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: beq a0, a2, .LBB56_2
; RV64-NEXT: # %bb.1: # %overflow
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
Expand Down

0 comments on commit f090924

Please sign in to comment.