Skip to content

Commit

Permalink
[RISCV][GISel] Add ISel support for SHXADD_UW and SLLI.UW (#69972)
Browse files Browse the repository at this point in the history
This patch also includes:
  - Remove legacy non_imm12 PatLeaf from RISCVInstrInfoZb.td
- Implement a custom GlobalISel operand renderer for TrailingZeros
SDNodeXForm
  • Loading branch information
mshockwave committed Oct 24, 2023
1 parent 2f4328e commit cdcaef8
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 23 deletions.
60 changes: 60 additions & 0 deletions llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ class RISCVInstructionSelector : public InstructionSelector {
return selectSHXADDOp(Root, ShAmt);
}

ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root,
unsigned ShAmt) const;
template <unsigned ShAmt>
ComplexRendererFns selectSHXADD_UWOp(MachineOperand &Root) const {
return selectSHXADD_UWOp(Root, ShAmt);
}

// Custom renderers for tablegen
void renderNegImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
Expand All @@ -91,6 +98,9 @@ class RISCVInstructionSelector : public InstructionSelector {
MachineRegisterInfo &MRI, RISCVCC::CondCode &CC,
Register &LHS, Register &RHS) const;

void renderTrailingZeros(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

const RISCVSubtarget &STI;
const RISCVInstrInfo &TII;
const RISCVRegisterInfo &TRI;
Expand Down Expand Up @@ -239,6 +249,47 @@ RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root,
return std::nullopt;
}

InstructionSelector::ComplexRendererFns
RISCVInstructionSelector::selectSHXADD_UWOp(MachineOperand &Root,
unsigned ShAmt) const {
using namespace llvm::MIPatternMatch;
MachineFunction &MF = *Root.getParent()->getParent()->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();

if (!Root.isReg())
return std::nullopt;
Register RootReg = Root.getReg();

// Given (and (shl x, c2), mask) in which mask is a shifted mask with
// 32 - ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
// c2 - ShAmt followed by SHXADD_UW with ShAmt for x amount.
APInt Mask, C2;
Register RegX;
if (mi_match(
RootReg, MRI,
m_OneNonDBGUse(m_GAnd(m_OneNonDBGUse(m_GShl(m_Reg(RegX), m_ICst(C2))),
m_ICst(Mask))))) {
Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue());

if (Mask.isShiftedMask()) {
unsigned Leading = Mask.countl_zero();
unsigned Trailing = Mask.countr_zero();
if (Leading == 32 - ShAmt && C2 == Trailing && Trailing > ShAmt) {
Register DstReg =
MRI.createGenericVirtualRegister(MRI.getType(RootReg));
return {{[=](MachineInstrBuilder &MIB) {
MachineIRBuilder(*MIB.getInstr())
.buildInstr(RISCV::SLLI, {DstReg}, {RegX})
.addImm(C2.getLimitedValue() - ShAmt);
MIB.addReg(DstReg);
}}};
}
}
}

return std::nullopt;
}

InstructionSelector::ComplexRendererFns
RISCVInstructionSelector::selectAddrRegImm(MachineOperand &Root) const {
// TODO: Need to get the immediate from a G_PTR_ADD. Should this be done in
Expand Down Expand Up @@ -383,6 +434,15 @@ void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB,
MIB.addImm(CstVal);
}

void RISCVInstructionSelector::renderTrailingZeros(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
uint64_t C = MI.getOperand(1).getCImm()->getZExtValue();
MIB.addImm(llvm::countr_zero(C));
}

const TargetRegisterClass *RISCVInstructionSelector::getRegClassForTypeOnBank(
LLT Ty, const RegisterBank &RB) const {
if (RB.getID() == RISCV::GPRRegBankID) {
Expand Down
18 changes: 17 additions & 1 deletion llvm/lib/Target/RISCV/RISCVGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def as_i64imm : SDNodeXForm<imm, [{
def gi_as_i64imm : GICustomOperandRenderer<"renderImm">,
GISDNodeXFormEquiv<as_i64imm>;

def gi_trailing_zero : GICustomOperandRenderer<"renderTrailingZeros">,
GISDNodeXFormEquiv<TrailingZeros>;

// FIXME: This is labelled as handling 's32', however the ComplexPattern it
// refers to handles both i32 and i64 based on the HwMode. Currently this LLT
// parameter appears to be ignored so this pattern works for both, however we
Expand All @@ -73,6 +76,13 @@ def gi_sh2add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<2>">,
def gi_sh3add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<3>">,
GIComplexPatternEquiv<sh3add_op>;

def gi_sh1add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<1>">,
GIComplexPatternEquiv<sh1add_uw_op>;
def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
GIComplexPatternEquiv<sh2add_uw_op>;
def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
GIComplexPatternEquiv<sh3add_uw_op>;

// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
(ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;
Expand Down Expand Up @@ -111,8 +121,14 @@ def : Pat<(i32 (udiv GPR:$rs1, GPR:$rs2)), (DIVUW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i32 (urem GPR:$rs1, GPR:$rs2)), (REMUW GPR:$rs1, GPR:$rs2)>;
}

let Predicates = [HasStdExtZba, IsRV64] in
let Predicates = [HasStdExtZba, IsRV64] in {
// This pattern is put here due to the fact that i32 is not a legal type
// in SDISel for RV64, which is not the case in GISel.
def : Pat<(shl (i64 (zext i32:$rs1)), uimm5:$shamt),
(SLLI_UW GPR:$rs1, uimm5:$shamt)>;

def : Pat<(i64 (zext i32:$rs)), (ADD_UW GPR:$rs, (XLenVT X0))>;
} // Predicates = [HasStdExtZba, IsRV64]

let Predicates = [IsRV64, NotHasStdExtZba] in
def: Pat<(i64 (zext i32:$rs)), (SRLI (SLLI GPR:$rs, 32), 32)>;
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
Original file line number Diff line number Diff line change
Expand Up @@ -540,11 +540,11 @@ def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)),
(TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;

// Reuse complex patterns from StdExtZba
def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
def : Pat<(add_non_imm12 sh1add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>;
def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
def : Pat<(add_non_imm12 sh2add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>;
def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
def : Pat<(add_non_imm12 sh3add_op:$rs1, (XLenVT GPR:$rs2)),
(TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;

def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
Expand Down
29 changes: 10 additions & 19 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
Original file line number Diff line number Diff line change
Expand Up @@ -230,16 +230,9 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;

// Pattern to exclude simm12 immediates from matching.
// Note: this will be removed once the GISel complex patterns for
// SHXADD_UW is landed.
def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{
auto *C = dyn_cast<ConstantSDNode>(N);
return !C || !isInt<12>(C->getSExtValue());
}]>;

// Pattern to exclude simm12 immediates from matching, namely `non_imm12`.
// GISel currently doesn't support PatFrag for leaf nodes, so `non_imm12`
// cannot be directly supported in GISel. To reuse patterns between the two
// cannot be implemented in that way. To reuse patterns between the two
// ISels, we instead create PatFrag on operators that use `non_imm12`.
class binop_with_non_imm12<SDPatternOperator binop>
: PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{
Expand All @@ -264,12 +257,11 @@ class binop_with_non_imm12<SDPatternOperator binop>
def add_non_imm12 : binop_with_non_imm12<add>;
def or_is_add_non_imm12 : binop_with_non_imm12<or_is_add>;

def Shifted32OnesMask : PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue();
if (!isShiftedMask_64(Imm))
def Shifted32OnesMask : IntImmLeaf<XLenVT, [{
if (!Imm.isShiftedMask())
return false;

unsigned TrailingZeros = llvm::countr_zero(Imm);
unsigned TrailingZeros = Imm.countr_zero();
return TrailingZeros > 0 && TrailingZeros < 32 &&
Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros;
}], TrailingZeros>;
Expand Down Expand Up @@ -776,12 +768,11 @@ def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenV
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;

// More complex cases use a ComplexPattern.
def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)),
(SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)),
(SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)),
(SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>;
foreach i = {1,2,3} in {
defvar pat = !cast<ComplexPattern>("sh"#i#"add_uw_op");
def : Pat<(i64 (add_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))),
(!cast<Instruction>("SH"#i#"ADD_UW") pat:$rs1, GPR:$rs2)>;
}

def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))),
(SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>;
Expand Down
99 changes: 99 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,102 @@ body: |
%6:gprb(s64) = G_ADD %5, %1
$x10 = COPY %6(s64)
...
---
name: shXadd_uw_complex_shl_and
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x10, $x11
; CHECK-LABEL: name: shXadd_uw_complex_shl_and
; CHECK: liveins: $x10, $x11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 1
; CHECK-NEXT: [[SH2ADD_UW:%[0-9]+]]:gpr = SH2ADD_UW [[SLLI]], [[COPY1]]
; CHECK-NEXT: $x10 = COPY [[SH2ADD_UW]]
%0:gprb(s64) = COPY $x10
%1:gprb(s64) = COPY $x11
%2:gprb(s64) = G_CONSTANT i64 3
%3:gprb(s64) = G_SHL %0, %2
%4:gprb(s64) = G_CONSTANT i64 17179869183
%5:gprb(s64) = G_AND %3, %4
%6:gprb(s64) = G_ADD %5, %1
$x10 = COPY %6(s64)
...
---
name: slli_uw
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x10
; CHECK-LABEL: name: slli_uw
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[SLLI_UW:%[0-9]+]]:gpr = SLLI_UW [[COPY]], 7
; CHECK-NEXT: $x10 = COPY [[SLLI_UW]]
%0:gprb(s64) = COPY $x10
%1:gprb(s64) = G_CONSTANT i64 4294967295
%2:gprb(s64) = G_AND %0, %1
%3:gprb(s64) = G_CONSTANT i64 7
%4:gprb(s64) = G_SHL %2, %3
$x10 = COPY %4(s64)
...
---
name: slli_uw_s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x10
; CHECK-LABEL: name: slli_uw_s32
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[SLLI_UW:%[0-9]+]]:gpr = SLLI_UW [[COPY]], 7
; CHECK-NEXT: $x10 = COPY [[SLLI_UW]]
%0:gprb(s64) = COPY $x10
%1:gprb(s32) = G_TRUNC %0(s64)
%2:gprb(s64) = G_ZEXT %1(s32)
%3:gprb(s64) = G_CONSTANT i64 7
%4:gprb(s64) = G_SHL %2, %3
$x10 = COPY %4(s64)
...
---
name: slli_uw_complex
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x10
; CHECK-LABEL: name: slli_uw_complex
; CHECK: liveins: $x10
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[COPY]], 2
; CHECK-NEXT: [[SLLI_UW:%[0-9]+]]:gpr = SLLI_UW [[SRLI]], 2
; CHECK-NEXT: $x10 = COPY [[SLLI_UW]]
%0:gprb(s64) = COPY $x10
%1:gprb(s64) = G_CONSTANT i64 17179869180
%2:gprb(s64) = G_AND %0, %1
$x10 = COPY %2(s64)
...

0 comments on commit cdcaef8

Please sign in to comment.