Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator &NextMBBI);
bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandCCOpToCMov(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opcode);
bool expandMV_FPR16INX(MachineBasicBlock &MBB,
Expand Down Expand Up @@ -178,6 +180,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
// First try expanding to a Conditional Move rather than a branch+mv
if (expandCCOpToCMov(MBB, MBBI))
return true;

MachineFunction *MF = MBB.getParent();
MachineInstr &MI = *MBBI;
Expand Down Expand Up @@ -277,6 +282,86 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
return true;
}

bool RISCVExpandPseudo::expandCCOpToCMov(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();

if (MI.getOpcode() != RISCV::PseudoCCMOVGPR &&
MI.getOpcode() != RISCV::PseudoCCMOVGPRNoX0)
return false;

if (!STI->hasVendorXqcicm())
return false;

// FIXME: Would be wonderful to support LHS=X0, but not very easy.
if (MI.getOperand(1).getReg() == RISCV::X0 ||
MI.getOperand(4).getReg() == RISCV::X0 ||
MI.getOperand(5).getReg() == RISCV::X0)
return false;

auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());

unsigned CMovOpcode, CMovIOpcode;
switch (CC) {
default:
llvm_unreachable("Unhandled CC");
case RISCVCC::COND_EQ:
CMovOpcode = RISCV::QC_MVEQ;
CMovIOpcode = RISCV::QC_MVEQI;
break;
case RISCVCC::COND_NE:
CMovOpcode = RISCV::QC_MVNE;
CMovIOpcode = RISCV::QC_MVNEI;
break;
case RISCVCC::COND_LT:
CMovOpcode = RISCV::QC_MVLT;
CMovIOpcode = RISCV::QC_MVLTI;
break;
case RISCVCC::COND_GE:
CMovOpcode = RISCV::QC_MVGE;
CMovIOpcode = RISCV::QC_MVGEI;
break;
case RISCVCC::COND_LTU:
CMovOpcode = RISCV::QC_MVLTU;
CMovIOpcode = RISCV::QC_MVLTUI;
break;
case RISCVCC::COND_GEU:
CMovOpcode = RISCV::QC_MVGEU;
CMovIOpcode = RISCV::QC_MVGEUI;
break;
}

if (MI.getOperand(2).getReg() == RISCV::X0) {
// $dst = PseudoCCMOVGPR $lhs, X0, $cc, $falsev (=$dst), $truev
// $dst = PseudoCCMOVGPRNoX0 $lhs, X0, $cc, $falsev (=$dst), $truev
// =>
// $dst = QC_MVccI $falsev (=$dst), $lhs, 0, $truev
BuildMI(MBB, MBBI, DL, TII->get(CMovIOpcode))
.addDef(MI.getOperand(0).getReg())
.addReg(MI.getOperand(4).getReg())
.addReg(MI.getOperand(1).getReg())
.addImm(0)
.addReg(MI.getOperand(5).getReg());

MI.eraseFromParent();
return true;
}

// $dst = PseudoCCMOVGPR $lhs, $rhs, $cc, $falsev (=$dst), $truev
// $dst = PseudoCCMOVGPRNoX0 $lhs, $rhs, $cc, $falsev (=$dst), $truev
// =>
// $dst = QC_MVcc $falsev (=$dst), $lhs, $rhs, $truev
BuildMI(MBB, MBBI, DL, TII->get(CMovOpcode))
.addDef(MI.getOperand(0).getReg())
.addReg(MI.getOperand(4).getReg())
.addReg(MI.getOperand(1).getReg())
.addReg(MI.getOperand(2).getReg())
.addReg(MI.getOperand(5).getReg());
MI.eraseFromParent();
return true;
}

bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Opcode) {
Expand Down
37 changes: 23 additions & 14 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,10 @@ class QCIMVCCIPat<CondCode Cond, QCIMVCCI Inst, DAGOperand InTyImm>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), InTyImm:$imm, Cond, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(Inst GPRNoX0:$rd, GPRNoX0:$rs1, InTyImm:$imm, GPRNoX0:$rs3)>;

class QCIMVCCIZeroPat<CondCode Cond, QCIMVCCI Inst>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), Cond, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(Inst GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;

class QCISELECTCCIPat<CondCode Cond, QCISELECTCCI Inst>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rd), simm5:$imm, Cond, (i32 GPRNoX0:$rs2), (i32 GPRNoX0:$rs3))),
(Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, GPRNoX0:$rs3)>;
Expand Down Expand Up @@ -1538,27 +1542,32 @@ def: Pat<(i32 (ctlz (not (i32 GPR:$rs1)))), (QC_CLO GPR:$rs1)>;
let Predicates = [HasVendorXqciint, IsRV32] in
def : Pat<(riscv_mileaveret_glue), (QC_C_MILEAVERET)>;

let Predicates = [HasVendorXqcicm, IsRV32] in {
// (SELECT X, Y, Z) is canonicalised to `(riscv_selectcc x, 0, NE, y, z)`.
// This exists to prioritise over the `Select_GPR_Using_CC_GPR` pattern.
def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETNE, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(QC_MVNEI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;
def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETEQ, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(QC_MVEQI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;

let Predicates = [HasVendorXqcicm, NoShortForwardBranchOpt, IsRV32] in {
def : QCIMVCCPat<SETEQ, QC_MVEQ>;
def : QCIMVCCPat<SETNE, QC_MVNE>;
def : QCIMVCCPat<SETLT, QC_MVLT>;
def : QCIMVCCPat<SETULT, QC_MVLTU>;
def : QCIMVCCPat<SETGE, QC_MVGE>;
def : QCIMVCCPat<SETUGE, QC_MVGEU>;

def : QCIMVCCIPat<SETEQ, QC_MVEQI, simm5>;
def : QCIMVCCIPat<SETNE, QC_MVNEI, simm5>;
def : QCIMVCCIPat<SETLT, QC_MVLTI, simm5>;
def : QCIMVCCIPat<SETULT, QC_MVLTUI, uimm5>;
def : QCIMVCCIPat<SETGE, QC_MVGEI, simm5>;
def : QCIMVCCIPat<SETUGE, QC_MVGEUI, uimm5>;
// These exist to prioritise over the `Select_GPR_Using_CC_GPR` pattern for X0.
def : QCIMVCCIZeroPat<SETEQ, QC_MVEQI>;
def : QCIMVCCIZeroPat<SETNE, QC_MVNEI>;
def : QCIMVCCIZeroPat<SETLT, QC_MVLTI>;
def : QCIMVCCIZeroPat<SETULT, QC_MVLTUI>;
def : QCIMVCCIZeroPat<SETGE, QC_MVGEI>;
def : QCIMVCCIZeroPat<SETUGE, QC_MVGEUI>;
}

let Predicates = [HasVendorXqcicm, IsRV32] in {
// These all use *imm5nonzero because we want to use PseudoCCMOVGPR with X0 when SFB is enabled.
// When SFB is not enabled, the `QCIMVCCIZeroPat`s above will be used if RHS=0.
def : QCIMVCCIPat<SETEQ, QC_MVEQI, simm5nonzero>;
def : QCIMVCCIPat<SETNE, QC_MVNEI, simm5nonzero>;
def : QCIMVCCIPat<SETLT, QC_MVLTI, simm5nonzero>;
def : QCIMVCCIPat<SETULT, QC_MVLTUI, uimm5nonzero>;
def : QCIMVCCIPat<SETGE, QC_MVGEI, simm5nonzero>;
def : QCIMVCCIPat<SETUGE, QC_MVGEUI, uimm5nonzero>;
}

let Predicates = [HasVendorXqcicli, IsRV32] in {
Expand Down
109 changes: 109 additions & 0 deletions llvm/test/CodeGen/RISCV/cmov-branch-opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
; RUN: | FileCheck -check-prefixes=SHORT_FORWARD,SFB-NOZICOND,SFB-NOZICOND-C %s
; RUN: llc -mtriple=riscv64 -mattr=+short-forward-branch-opt,+zicond -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=SHORT_FORWARD,SFB-ZICOND %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI

; The conditional move optimization in sifive-p450 requires that only a
; single c.mv instruction appears in the branch shadow.
Expand Down Expand Up @@ -42,6 +44,14 @@ define signext i32 @test1(i32 signext %x, i32 signext %y, i32 signext %z) {
; SHORT_FORWARD-NEXT: xor a0, a0, a1
; SHORT_FORWARD-NEXT: .LBB0_2:
; SHORT_FORWARD-NEXT: ret
;
; RV32IXQCI-LABEL: test1:
; RV32IXQCI: # %bb.0:
; RV32IXQCI-NEXT: bnez a2, .LBB0_2
; RV32IXQCI-NEXT: # %bb.1:
; RV32IXQCI-NEXT: xor a0, a0, a1
; RV32IXQCI-NEXT: .LBB0_2:
; RV32IXQCI-NEXT: ret
%c = icmp eq i32 %z, 0
%a = xor i32 %x, %y
%b = select i1 %c, i32 %a, i32 %x
Expand Down Expand Up @@ -73,6 +83,14 @@ define signext i32 @test2(i32 signext %x, i32 signext %y, i32 signext %z) {
; SHORT_FORWARD-NEXT: xor a0, a0, a1
; SHORT_FORWARD-NEXT: .LBB1_2:
; SHORT_FORWARD-NEXT: ret
;
; RV32IXQCI-LABEL: test2:
; RV32IXQCI: # %bb.0:
; RV32IXQCI-NEXT: beqz a2, .LBB1_2
; RV32IXQCI-NEXT: # %bb.1:
; RV32IXQCI-NEXT: xor a0, a0, a1
; RV32IXQCI-NEXT: .LBB1_2:
; RV32IXQCI-NEXT: ret
%c = icmp eq i32 %z, 0
%a = xor i32 %x, %y
%b = select i1 %c, i32 %x, i32 %a
Expand Down Expand Up @@ -120,6 +138,19 @@ define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 si
; SHORT_FORWARD-NEXT: .LBB2_4:
; SHORT_FORWARD-NEXT: addw a0, a0, a2
; SHORT_FORWARD-NEXT: ret
;
; RV32IXQCI-LABEL: test3:
; RV32IXQCI: # %bb.0:
; RV32IXQCI-NEXT: beqz a4, .LBB2_2
; RV32IXQCI-NEXT: # %bb.1:
; RV32IXQCI-NEXT: xor a0, a0, a1
; RV32IXQCI-NEXT: .LBB2_2:
; RV32IXQCI-NEXT: beqz a4, .LBB2_4
; RV32IXQCI-NEXT: # %bb.3:
; RV32IXQCI-NEXT: xor a2, a2, a3
; RV32IXQCI-NEXT: .LBB2_4:
; RV32IXQCI-NEXT: add a0, a0, a2
; RV32IXQCI-NEXT: ret
%c = icmp eq i32 %z, 0
%a = xor i32 %v, %w
%b = select i1 %c, i32 %v, i32 %a
Expand Down Expand Up @@ -167,6 +198,12 @@ define signext i32 @test4(i32 signext %x, i32 signext %y, i32 signext %z) {
; SFB-ZICOND-NEXT: li a0, 3
; SFB-ZICOND-NEXT: czero.nez a0, a0, a2
; SFB-ZICOND-NEXT: ret
;
; RV32IXQCI-LABEL: test4:
; RV32IXQCI: # %bb.0:
; RV32IXQCI-NEXT: li a0, 0
; RV32IXQCI-NEXT: qc.lieqi a0, a2, 0, 3
; RV32IXQCI-NEXT: ret
%c = icmp eq i32 %z, 0
%a = select i1 %c, i32 3, i32 0
ret i32 %a
Expand Down Expand Up @@ -199,6 +236,15 @@ define i16 @select_xor_1(i16 %A, i8 %cond) {
; SHORT_FORWARD-NEXT: xori a0, a0, 43
; SHORT_FORWARD-NEXT: .LBB4_2: # %entry
; SHORT_FORWARD-NEXT: ret
;
; RV32IXQCI-LABEL: select_xor_1:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a1, a1, 1
; RV32IXQCI-NEXT: beqz a1, .LBB4_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: xori a0, a0, 43
; RV32IXQCI-NEXT: .LBB4_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i8 %cond, 1
%cmp10 = icmp eq i8 %and, 0
Expand Down Expand Up @@ -236,6 +282,15 @@ define i16 @select_xor_1b(i16 %A, i8 %cond) {
; SHORT_FORWARD-NEXT: xori a0, a0, 43
; SHORT_FORWARD-NEXT: .LBB5_2: # %entry
; SHORT_FORWARD-NEXT: ret
;
; RV32IXQCI-LABEL: select_xor_1b:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a1, a1, 1
; RV32IXQCI-NEXT: beqz a1, .LBB5_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: xori a0, a0, 43
; RV32IXQCI-NEXT: .LBB5_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i8 %cond, 1
%cmp10 = icmp ne i8 %and, 1
Expand Down Expand Up @@ -289,6 +344,15 @@ define i32 @select_xor_2(i32 %A, i32 %B, i8 %cond) {
; SFB-ZICOND-NEXT: xor a0, a1, a0
; SFB-ZICOND-NEXT: .LBB6_2: # %entry
; SFB-ZICOND-NEXT: ret
;
; RV32IXQCI-LABEL: select_xor_2:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a2, a2, 1
; RV32IXQCI-NEXT: beqz a2, .LBB6_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: xor a0, a0, a1
; RV32IXQCI-NEXT: .LBB6_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i8 %cond, 1
%cmp10 = icmp eq i8 %and, 0
Expand Down Expand Up @@ -344,6 +408,15 @@ define i32 @select_xor_2b(i32 %A, i32 %B, i8 %cond) {
; SFB-ZICOND-NEXT: xor a0, a1, a0
; SFB-ZICOND-NEXT: .LBB7_2: # %entry
; SFB-ZICOND-NEXT: ret
;
; RV32IXQCI-LABEL: select_xor_2b:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a2, a2, 1
; RV32IXQCI-NEXT: beqz a2, .LBB7_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: xor a0, a0, a1
; RV32IXQCI-NEXT: .LBB7_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i8 %cond, 1
%cmp10 = icmp ne i8 %and, 1
Expand Down Expand Up @@ -397,6 +470,15 @@ define i32 @select_or(i32 %A, i32 %B, i8 %cond) {
; SFB-ZICOND-NEXT: or a0, a1, a0
; SFB-ZICOND-NEXT: .LBB8_2: # %entry
; SFB-ZICOND-NEXT: ret
;
; RV32IXQCI-LABEL: select_or:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a2, a2, 1
; RV32IXQCI-NEXT: beqz a2, .LBB8_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: or a0, a0, a1
; RV32IXQCI-NEXT: .LBB8_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i8 %cond, 1
%cmp10 = icmp eq i8 %and, 0
Expand Down Expand Up @@ -452,6 +534,15 @@ define i32 @select_or_b(i32 %A, i32 %B, i8 %cond) {
; SFB-ZICOND-NEXT: or a0, a1, a0
; SFB-ZICOND-NEXT: .LBB9_2: # %entry
; SFB-ZICOND-NEXT: ret
;
; RV32IXQCI-LABEL: select_or_b:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a2, a2, 1
; RV32IXQCI-NEXT: beqz a2, .LBB9_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: or a0, a0, a1
; RV32IXQCI-NEXT: .LBB9_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i8 %cond, 1
%cmp10 = icmp ne i8 %and, 1
Expand Down Expand Up @@ -505,6 +596,15 @@ define i32 @select_or_1(i32 %A, i32 %B, i32 %cond) {
; SFB-ZICOND-NEXT: or a0, a1, a0
; SFB-ZICOND-NEXT: .LBB10_2: # %entry
; SFB-ZICOND-NEXT: ret
;
; RV32IXQCI-LABEL: select_or_1:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a2, a2, 1
; RV32IXQCI-NEXT: beqz a2, .LBB10_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: or a0, a0, a1
; RV32IXQCI-NEXT: .LBB10_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i32 %cond, 1
%cmp10 = icmp eq i32 %and, 0
Expand Down Expand Up @@ -560,6 +660,15 @@ define i32 @select_or_1b(i32 %A, i32 %B, i32 %cond) {
; SFB-ZICOND-NEXT: or a0, a1, a0
; SFB-ZICOND-NEXT: .LBB11_2: # %entry
; SFB-ZICOND-NEXT: ret
;
; RV32IXQCI-LABEL: select_or_1b:
; RV32IXQCI: # %bb.0: # %entry
; RV32IXQCI-NEXT: andi a2, a2, 1
; RV32IXQCI-NEXT: beqz a2, .LBB11_2
; RV32IXQCI-NEXT: # %bb.1: # %entry
; RV32IXQCI-NEXT: or a0, a0, a1
; RV32IXQCI-NEXT: .LBB11_2: # %entry
; RV32IXQCI-NEXT: ret
entry:
%and = and i32 %cond, 1
%cmp10 = icmp ne i32 %and, 1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/select-bare.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI

define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
Expand Down
Loading