Skip to content

Commit

Permalink
[RISCV] Custom legalize bswap/bitreverse to GREVI with Zbp extension …
Browse files Browse the repository at this point in the history
…to enable them to combine with other GREVI instructions

This enables bswap/bitreverse to combine with other GREVI patterns or each other without needing to add more special cases to the DAG combine or new DAG combines.

I've also enabled the existing GREVI combine for GREVIW so that it can pick up the i32 bswap/bitreverse on RV64 after they've been type legalized to GREVIW.

Differential Revision: https://reviews.llvm.org/D92253
  • Loading branch information
topperc committed Nov 30, 2020
1 parent 589e10f commit 76d1026
Show file tree
Hide file tree
Showing 4 changed files with 473 additions and 27 deletions.
64 changes: 46 additions & 18 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -162,7 +162,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}

if (Subtarget.hasStdExtZbp()) {
setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
setOperationAction(ISD::BSWAP, XLenVT, Custom);

if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
Expand Down Expand Up @@ -495,6 +496,20 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::BSWAP:
case ISD::BITREVERSE: {
// Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
// Start with the maximum immediate value which is the bitwidth - 1.
unsigned Imm = VT.getSizeInBits() - 1;
// If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
if (Op.getOpcode() == ISD::BSWAP)
Imm &= ~0x7U;
return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
}
}
}

Expand Down Expand Up @@ -1288,6 +1303,29 @@ static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
return SDValue();
}

static SDValue combineGREVI(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
uint64_t ShAmt1 = N->getConstantOperandVal(1);
SDValue GREVSrc = N->getOperand(0);

if (GREVSrc->getOpcode() != N->getOpcode())
return SDValue();

uint64_t ShAmt2 = GREVSrc.getConstantOperandVal(1);
GREVSrc = GREVSrc->getOperand(0);

uint64_t CombinedShAmt = ShAmt1 ^ ShAmt2;
if (CombinedShAmt == 0)
return GREVSrc;

SDLoc DL(N);
return DAG.getNode(
N->getOpcode(), DL, N->getValueType(0), GREVSrc,
DAG.getTargetConstant(CombinedShAmt, DL, Subtarget.getXLenVT()));
}

SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -1383,6 +1421,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DCI.AddToWorklist(N);
return SDValue(N, 0);
}

if (N->getOpcode() == RISCVISD::GREVIW)
if (SDValue V = combineGREVI(N, DCI.DAG, Subtarget))
return V;

break;
}
case RISCVISD::FMV_X_ANYEXTW_RV64: {
Expand Down Expand Up @@ -1415,23 +1458,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
DAG.getConstant(~SignBit, DL, MVT::i64));
}
case RISCVISD::GREVI: {
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
SDLoc DL(N);
auto GREVSrc = N->getOperand(0);
uint64_t ShAmt1 = N->getConstantOperandVal(1);
if (GREVSrc->getOpcode() != RISCVISD::GREVI)
break;
uint64_t ShAmt2 = GREVSrc.getConstantOperandVal(1);
GREVSrc = GREVSrc->getOperand(0);
uint64_t CombinedShAmt = ShAmt1 ^ ShAmt2;
if (CombinedShAmt == 0)
return GREVSrc;
return DAG.getNode(
RISCVISD::GREVI, DL, N->getValueType(0), GREVSrc,
DAG.getTargetConstant(CombinedShAmt, DL, Subtarget.getXLenVT()));
}
case RISCVISD::GREVI:
return combineGREVI(N, DCI.DAG, Subtarget);
case ISD::OR:
if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
return GREV;
Expand Down
11 changes: 2 additions & 9 deletions llvm/lib/Target/RISCV/RISCVInstrInfoB.td
Expand Up @@ -730,17 +730,10 @@ def : Pat<(riscv_gorci GPR:$rs1, timm:$shamt), (GORCI GPR:$rs1, timm:$shamt)>;
} // Predicates = [HasStdExtZbp]

let Predicates = [HasStdExtZbp, IsRV32] in {
def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, 8)>;
def : Pat<(rotl (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, 8)>;
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
def : Pat<(rotr (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>;
def : Pat<(rotl (riscv_grevi GPR:$rs1, (i32 24)), (i32 16)), (GREVI GPR:$rs1, 8)>;
} // Predicates = [HasStdExtZbp, IsRV32]

let Predicates = [HasStdExtZbp, IsRV64] in {
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
} // Predicates = [HasStdExtZbp, IsRV64]

let Predicates = [HasStdExtZbt] in {
def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
(CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
Expand Down
169 changes: 169 additions & 0 deletions llvm/test/CodeGen/RISCV/rv32Zbp.ll
Expand Up @@ -1849,6 +1849,175 @@ define i32 @bswap_rotl_i32(i32 %a) {
ret i32 %2
}

define i32 @bitreverse_bswap_i32(i32 %a) {
; RV32I-LABEL: bitreverse_bswap_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: lui a2, 16
; RV32I-NEXT: addi a2, a2, -256
; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: srli a3, a0, 24
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: slli a3, a0, 8
; RV32I-NEXT: lui a4, 4080
; RV32I-NEXT: and a3, a3, a4
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: lui a1, 61681
; RV32I-NEXT: addi a1, a1, -241
; RV32I-NEXT: and a1, a0, a1
; RV32I-NEXT: slli a1, a1, 4
; RV32I-NEXT: lui a3, 986895
; RV32I-NEXT: addi a3, a3, 240
; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: srli a0, a0, 4
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: lui a1, 209715
; RV32I-NEXT: addi a1, a1, 819
; RV32I-NEXT: and a1, a0, a1
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: lui a3, 838861
; RV32I-NEXT: addi a3, a3, -820
; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: srli a0, a0, 2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: lui a1, 349525
; RV32I-NEXT: addi a1, a1, 1365
; RV32I-NEXT: and a1, a0, a1
; RV32I-NEXT: slli a1, a1, 1
; RV32I-NEXT: lui a3, 699051
; RV32I-NEXT: addi a3, a3, -1366
; RV32I-NEXT: and a0, a0, a3
; RV32I-NEXT: srli a0, a0, 1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: srli a1, a0, 8
; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: srli a2, a0, 24
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: slli a2, a0, 8
; RV32I-NEXT: and a2, a2, a4
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: ret
;
; RV32IB-LABEL: bitreverse_bswap_i32:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rev.b a0, a0
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: bitreverse_bswap_i32:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rev.b a0, a0
; RV32IBP-NEXT: ret
%1 = call i32 @llvm.bitreverse.i32(i32 %a)
%2 = call i32 @llvm.bswap.i32(i32 %1)
ret i32 %2
}

define i64 @bitreverse_bswap_i64(i64 %a) {
; RV32I-LABEL: bitreverse_bswap_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: srli a3, a1, 8
; RV32I-NEXT: lui a2, 16
; RV32I-NEXT: addi t1, a2, -256
; RV32I-NEXT: and a3, a3, t1
; RV32I-NEXT: srli a4, a1, 24
; RV32I-NEXT: or a4, a3, a4
; RV32I-NEXT: slli a5, a1, 8
; RV32I-NEXT: lui a6, 4080
; RV32I-NEXT: and a5, a5, a6
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: lui a4, 61681
; RV32I-NEXT: addi a7, a4, -241
; RV32I-NEXT: and a5, a1, a7
; RV32I-NEXT: slli a5, a5, 4
; RV32I-NEXT: lui a3, 986895
; RV32I-NEXT: addi t0, a3, 240
; RV32I-NEXT: and a1, a1, t0
; RV32I-NEXT: srli a1, a1, 4
; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: lui a5, 209715
; RV32I-NEXT: addi t2, a5, 819
; RV32I-NEXT: and a4, a1, t2
; RV32I-NEXT: slli a4, a4, 2
; RV32I-NEXT: lui a3, 838861
; RV32I-NEXT: addi t3, a3, -820
; RV32I-NEXT: and a1, a1, t3
; RV32I-NEXT: srli a1, a1, 2
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: lui a4, 349525
; RV32I-NEXT: addi a4, a4, 1365
; RV32I-NEXT: and a2, a1, a4
; RV32I-NEXT: slli a2, a2, 1
; RV32I-NEXT: lui a5, 699051
; RV32I-NEXT: addi a5, a5, -1366
; RV32I-NEXT: and a1, a1, a5
; RV32I-NEXT: srli a1, a1, 1
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: srli a2, a0, 8
; RV32I-NEXT: and a2, a2, t1
; RV32I-NEXT: srli a3, a0, 24
; RV32I-NEXT: or a2, a2, a3
; RV32I-NEXT: slli a3, a0, 8
; RV32I-NEXT: and a3, a3, a6
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: and a2, a0, a7
; RV32I-NEXT: slli a2, a2, 4
; RV32I-NEXT: and a0, a0, t0
; RV32I-NEXT: srli a0, a0, 4
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: and a2, a0, t2
; RV32I-NEXT: slli a2, a2, 2
; RV32I-NEXT: and a0, a0, t3
; RV32I-NEXT: srli a0, a0, 2
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: and a2, a0, a4
; RV32I-NEXT: slli a2, a2, 1
; RV32I-NEXT: and a0, a0, a5
; RV32I-NEXT: srli a0, a0, 1
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: srli a2, a0, 8
; RV32I-NEXT: and a2, a2, t1
; RV32I-NEXT: srli a3, a0, 24
; RV32I-NEXT: or a2, a2, a3
; RV32I-NEXT: slli a3, a0, 8
; RV32I-NEXT: and a3, a3, a6
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: srli a2, a1, 8
; RV32I-NEXT: and a2, a2, t1
; RV32I-NEXT: srli a3, a1, 24
; RV32I-NEXT: or a2, a2, a3
; RV32I-NEXT: slli a3, a1, 8
; RV32I-NEXT: and a3, a3, a6
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: ret
;
; RV32IB-LABEL: bitreverse_bswap_i64:
; RV32IB: # %bb.0:
; RV32IB-NEXT: rev.b a0, a0
; RV32IB-NEXT: rev.b a1, a1
; RV32IB-NEXT: ret
;
; RV32IBP-LABEL: bitreverse_bswap_i64:
; RV32IBP: # %bb.0:
; RV32IBP-NEXT: rev.b a0, a0
; RV32IBP-NEXT: rev.b a1, a1
; RV32IBP-NEXT: ret
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = call i64 @llvm.bswap.i64(i64 %1)
ret i64 %2
}

define i32 @shfl1_i32(i32 %a, i32 %b) nounwind {
; RV32I-LABEL: shfl1_i32:
; RV32I: # %bb.0:
Expand Down

0 comments on commit 76d1026

Please sign in to comment.