Skip to content

Commit

Permalink
[RISCV] Teach isel to select ADDW/SUBW/MULW/SLLIW when only the lower…
Browse files Browse the repository at this point in the history
… 32-bits are used.

We normally select these when the root node is a sext_inreg, but
SimplifyDemandedBits can sometimes bypass the sext_inreg for some
users. This can create situation where sext_inreg+add/sub/mul/shl
is selected to a W instruction, and then the add/sub/mul/shl is
separately selected to a non-W instruction with the same inputs.

This patch tries to detect when it would still be ok to use a W
instruction without the sext_inreg by checking the direct users.
This can allow the W instruction to CSE with one created for a
sext_inreg+add/sub/mul/shl. To minimize complexity and cost of
checking, we make no attempt to determine if the CSE will happen
and just always use a W instruction when we can.

Differential Revision: https://reviews.llvm.org/D107658
  • Loading branch information
topperc committed Aug 18, 2021
1 parent 6cc1109 commit d9ba1a9
Show file tree
Hide file tree
Showing 33 changed files with 692 additions and 610 deletions.
82 changes: 82 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Expand Up @@ -1496,6 +1496,88 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
return false;
}

// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
// SimplifyDemandedBits has made it so some users see a sext_inreg and some
// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
// the add/sub/mul/shl to become non-W instructions. By checking the users we
// may be able to use a W instruction and CSE with the other instruction if
// this has happened. We could try to detect that the CSE opportunity exists
// before doing this, but that would be more complicated.
// TODO: Does this need to look through AND/OR/XOR to their users to find more
// opportunities.
bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL) &&
"Unexpected opcode");

for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
// Users of this node should have already been instruction selected
if (!User->isMachineOpcode())
return false;

// TODO: Add more opcodes?
switch (User->getMachineOpcode()) {
default:
return false;
case RISCV::ADDW:
case RISCV::ADDIW:
case RISCV::SUBW:
case RISCV::MULW:
case RISCV::SLLW:
case RISCV::SLLIW:
case RISCV::SRAW:
case RISCV::SRAIW:
case RISCV::SRLW:
case RISCV::SRLIW:
case RISCV::DIVW:
case RISCV::DIVUW:
case RISCV::REMW:
case RISCV::REMUW:
case RISCV::ROLW:
case RISCV::RORW:
case RISCV::RORIW:
case RISCV::CLZW:
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLIUW:
if (Bits < 32)
return false;
break;
case RISCV::SLLI:
// SLLI only uses the lower (XLen - ShAmt) bits.
if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
return false;
break;
case RISCV::ADDUW:
case RISCV::SH1ADDUW:
case RISCV::SH2ADDUW:
case RISCV::SH3ADDUW:
// The first operand to add.uw/shXadd.uw is implicitly zero extended from
// 32 bits.
if (UI.getOperandNo() != 0 || Bits < 32)
return false;
break;
case RISCV::SB:
if (UI.getOperandNo() != 0 || Bits < 8)
return false;
break;
case RISCV::SH:
if (UI.getOperandNo() != 0 || Bits < 16)
return false;
break;
case RISCV::SW:
if (UI.getOperandNo() != 0 || Bits < 32)
return false;
break;
}
}

return true;
}

// Select VL as a 5 bit immediate or a value that will become a register. This
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
Expand Up @@ -58,6 +58,9 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool selectSExti32(SDValue N, SDValue &Val);
bool selectZExti32(SDValue N, SDValue &Val);

bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const;
bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }

bool selectVLOp(SDValue N, SDValue &VL);

bool selectVSplat(SDValue N, SDValue &SplatVal);
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Expand Up @@ -1254,6 +1254,14 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
}

// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
// if only the lower 32 bits of their result is used.
class overflowingbinopw<SDPatternOperator operator>
: PatFrag<(ops node:$lhs, node:$rhs),
(operator node:$lhs, node:$rhs), [{
return hasAllWUsers(Node);
}]>;

let Predicates = [IsRV64] in {

/// sext and zext
Expand Down Expand Up @@ -1283,6 +1291,13 @@ def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;

// Select W instructions without sext_inreg if only the lower 32 bits of the
// result are used.
def : PatGprGpr<overflowingbinopw<add>, ADDW>;
def : PatGprSimm12<overflowingbinopw<add>, ADDIW>;
def : PatGprGpr<overflowingbinopw<sub>, SUBW>;
def : PatGprImm<overflowingbinopw<shl>, SLLIW, uimm5>;

/// Loads

defm : LdPat<sextloadi32, LW, i64>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoM.td
Expand Up @@ -75,6 +75,10 @@ let Predicates = [HasStdExtM, IsRV64] in {
def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
(MULW GPR:$rs1, GPR:$rs2)>;

// Select W instructions without sext_inreg if only the lower 32-bits of the
// result are used.
def : PatGprGpr<overflowingbinopw<mul>, MULW>;

def : PatGprGpr<riscv_divw, DIVW>;
def : PatGprGpr<riscv_divuw, DIVUW>;
def : PatGprGpr<riscv_remuw, REMUW>;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/add-before-shl.ll
Expand Up @@ -21,7 +21,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
;
; RV64I-LABEL: add_small_const:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a0, a0, 1
; RV64I-NEXT: addiw a0, a0, 1
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: jalr zero, 0(ra)
Expand All @@ -35,7 +35,7 @@ define signext i32 @add_small_const(i32 signext %a) nounwind {
;
; RV64C-LABEL: add_small_const:
; RV64C: # %bb.0:
; RV64C-NEXT: c.addi a0, 1
; RV64C-NEXT: c.addiw a0, 1
; RV64C-NEXT: c.slli a0, 56
; RV64C-NEXT: c.srai a0, 56
; RV64C-NEXT: c.jr ra
Expand Down Expand Up @@ -75,7 +75,7 @@ define signext i32 @add_large_const(i32 signext %a) nounwind {
; RV64C: # %bb.0:
; RV64C-NEXT: c.lui a1, 1
; RV64C-NEXT: c.addiw a1, -1
; RV64C-NEXT: c.add a0, a1
; RV64C-NEXT: c.addw a0, a1
; RV64C-NEXT: c.slli a0, 48
; RV64C-NEXT: c.srai a0, 48
; RV64C-NEXT: c.jr ra
Expand Down Expand Up @@ -115,7 +115,7 @@ define signext i32 @add_huge_const(i32 signext %a) nounwind {
; RV64C: # %bb.0:
; RV64C-NEXT: c.lui a1, 8
; RV64C-NEXT: c.addiw a1, -1
; RV64C-NEXT: c.add a0, a1
; RV64C-NEXT: c.addw a0, a1
; RV64C-NEXT: c.slli a0, 48
; RV64C-NEXT: c.srai a0, 48
; RV64C-NEXT: c.jr ra
Expand All @@ -135,7 +135,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
;
; RV64I-LABEL: add_non_machine_type:
; RV64I: # %bb.0:
; RV64I-NEXT: addi a0, a0, 256
; RV64I-NEXT: addiw a0, a0, 256
; RV64I-NEXT: slli a0, a0, 52
; RV64I-NEXT: srai a0, a0, 40
; RV64I-NEXT: jalr zero, 0(ra)
Expand All @@ -149,7 +149,7 @@ define signext i24 @add_non_machine_type(i24 signext %a) nounwind {
;
; RV64C-LABEL: add_non_machine_type:
; RV64C: # %bb.0:
; RV64C-NEXT: addi a0, a0, 256
; RV64C-NEXT: addiw a0, a0, 256
; RV64C-NEXT: c.slli a0, 52
; RV64C-NEXT: c.srai a0, 40
; RV64C-NEXT: c.jr ra
Expand Down
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/RISCV/add-imm.ll
Expand Up @@ -180,10 +180,9 @@ define signext i32 @add32_sext_reject_on_rv64(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 1
; RV64I-NEXT: addiw a1, a1, -1096
; RV64I-NEXT: add a2, a0, a1
; RV64I-NEXT: lui a3, %hi(gv0)
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: sw a2, %lo(gv0)(a3)
; RV64I-NEXT: lui a1, %hi(gv0)
; RV64I-NEXT: sw a0, %lo(gv0)(a1)
; RV64I-NEXT: ret
%b = add nsw i32 %a, 3000
store i32 %b, i32* @gv0, align 4
Expand Down Expand Up @@ -234,8 +233,8 @@ define void @add32_reject() nounwind {
; RV64I-NEXT: lw a3, %lo(gb)(a2)
; RV64I-NEXT: lui a4, 1
; RV64I-NEXT: addiw a4, a4, -1096
; RV64I-NEXT: add a1, a1, a4
; RV64I-NEXT: add a3, a3, a4
; RV64I-NEXT: addw a1, a1, a4
; RV64I-NEXT: addw a3, a3, a4
; RV64I-NEXT: sw a1, %lo(ga)(a0)
; RV64I-NEXT: sw a3, %lo(gb)(a2)
; RV64I-NEXT: ret
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/addimm-mulimm.ll
Expand Up @@ -18,7 +18,7 @@ define signext i32 @add_mul_trans_accept_1(i32 %x) {
; RV64IM-LABEL: add_mul_trans_accept_1:
; RV64IM: # %bb.0:
; RV64IM-NEXT: addi a1, zero, 11
; RV64IM-NEXT: mul a0, a0, a1
; RV64IM-NEXT: mulw a0, a0, a1
; RV64IM-NEXT: addiw a0, a0, 407
; RV64IM-NEXT: ret
%tmp0 = add i32 %x, 37
Expand All @@ -39,7 +39,7 @@ define signext i32 @add_mul_trans_accept_2(i32 %x) {
; RV64IM-LABEL: add_mul_trans_accept_2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: addi a1, zero, 13
; RV64IM-NEXT: mul a0, a0, a1
; RV64IM-NEXT: mulw a0, a0, a1
; RV64IM-NEXT: lui a1, 28
; RV64IM-NEXT: addiw a1, a1, 1701
; RV64IM-NEXT: addw a0, a0, a1
Expand All @@ -62,7 +62,7 @@ define signext i32 @add_mul_trans_reject_1(i32 %x) {
; RV64IM-LABEL: add_mul_trans_reject_1:
; RV64IM: # %bb.0:
; RV64IM-NEXT: addi a1, zero, 19
; RV64IM-NEXT: mul a0, a0, a1
; RV64IM-NEXT: mulw a0, a0, a1
; RV64IM-NEXT: lui a1, 9
; RV64IM-NEXT: addiw a1, a1, 585
; RV64IM-NEXT: addw a0, a0, a1
Expand All @@ -87,7 +87,7 @@ define signext i32 @add_mul_trans_reject_2(i32 %x) {
; RV64IM: # %bb.0:
; RV64IM-NEXT: lui a1, 792
; RV64IM-NEXT: addiw a1, a1, -1709
; RV64IM-NEXT: mul a0, a0, a1
; RV64IM-NEXT: mulw a0, a0, a1
; RV64IM-NEXT: lui a1, 1014660
; RV64IM-NEXT: addiw a1, a1, -1891
; RV64IM-NEXT: addw a0, a0, a1
Expand Down

0 comments on commit d9ba1a9

Please sign in to comment.