-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[RISCV] Xqci with Short Forward Branches #161407
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Sam Elliott (lenary) ChangesThis PR implements support for the combination of Xqci and the Short Forward Branch optimisation. In particular, we want to prioritise However, when expanding This PR:
Patch is 46.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161407.diff 10 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index d4d9e5430d390..0cda1735b72b1 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -46,6 +46,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator &NextMBBI);
bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandCCOpToCMov(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opcode);
bool expandMV_FPR16INX(MachineBasicBlock &MBB,
@@ -178,6 +180,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
+ // First try expanding to a Conditional Move rather than a branch+mv
+ if (expandCCOpToCMov(MBB, MBBI))
+ return true;
MachineFunction *MF = MBB.getParent();
MachineInstr &MI = *MBBI;
@@ -277,6 +282,86 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
return true;
}
+bool RISCVExpandPseudo::expandCCOpToCMov(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ if (MI.getOpcode() != RISCV::PseudoCCMOVGPR &&
+ MI.getOpcode() != RISCV::PseudoCCMOVGPRNoX0)
+ return false;
+
+ if (!STI->hasVendorXqcicm())
+ return false;
+
+ // FIXME: Would be wonderful to support LHS=X0, but not very easy.
+ if (MI.getOperand(1).getReg() == RISCV::X0 ||
+ MI.getOperand(4).getReg() == RISCV::X0 ||
+ MI.getOperand(5).getReg() == RISCV::X0)
+ return false;
+
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+
+ unsigned CMovOpcode, CMovIOpcode;
+ switch (CC) {
+ default:
+ llvm_unreachable("Unhandled CC");
+ case RISCVCC::COND_EQ:
+ CMovOpcode = RISCV::QC_MVEQ;
+ CMovIOpcode = RISCV::QC_MVEQI;
+ break;
+ case RISCVCC::COND_NE:
+ CMovOpcode = RISCV::QC_MVNE;
+ CMovIOpcode = RISCV::QC_MVNEI;
+ break;
+ case RISCVCC::COND_LT:
+ CMovOpcode = RISCV::QC_MVLT;
+ CMovIOpcode = RISCV::QC_MVNEI;
+ break;
+ case RISCVCC::COND_GE:
+ CMovOpcode = RISCV::QC_MVGE;
+ CMovIOpcode = RISCV::QC_MVGEI;
+ break;
+ case RISCVCC::COND_LTU:
+ CMovOpcode = RISCV::QC_MVLTU;
+ CMovIOpcode = RISCV::QC_MVLTUI;
+ break;
+ case RISCVCC::COND_GEU:
+ CMovOpcode = RISCV::QC_MVGEU;
+ CMovIOpcode = RISCV::QC_MVGEUI;
+ break;
+ }
+
+ if (MI.getOperand(2).getReg() == RISCV::X0) {
+ // $dst = PseudoCCMOVGPR $lhs, X0, $cc, $falsev (=$dst), $truev
+ // $dst = PseudoCCMOVGPRNoX0 $lhs, X0, $cc, $falsev (=$dst), $truev
+ // =>
+ // $dst = QC_MVccI $falsev (=$dst), $lhs, 0, $truev
+ BuildMI(MBB, MBBI, DL, TII->get(CMovIOpcode))
+ .addDef(MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(MI.getOperand(1).getReg())
+ .addImm(0)
+ .addReg(MI.getOperand(5).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // $dst = PseudoCCMOVGPR $lhs, $rhs, $cc, $falsev (=$dst), $truev
+ // $dst = PseudoCCMOVGPRNoX0 $lhs, $rhs, $cc, $falsev (=$dst), $truev
+ // =>
+ // $dst = QC_MVcc $falsev (=$dst), $lhs, $rhs, $truev
+ BuildMI(MBB, MBBI, DL, TII->get(CMovOpcode))
+ .addDef(MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg())
+ .addReg(MI.getOperand(5).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Opcode) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index ff4a0406799b1..3b64a224a9d09 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1350,6 +1350,10 @@ class QCIMVCCIPat<CondCode Cond, QCIMVCCI Inst, DAGOperand InTyImm>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), InTyImm:$imm, Cond, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(Inst GPRNoX0:$rd, GPRNoX0:$rs1, InTyImm:$imm, GPRNoX0:$rs3)>;
+class QCIMVCCIZeroPat<CondCode Cond, QCIMVCCI Inst>
+ : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), Cond, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
+ (Inst GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;
+
class QCISELECTCCIPat<CondCode Cond, QCISELECTCCI Inst>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rd), simm5:$imm, Cond, (i32 GPRNoX0:$rs2), (i32 GPRNoX0:$rs3))),
(Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, GPRNoX0:$rs3)>;
@@ -1538,14 +1542,7 @@ def: Pat<(i32 (ctlz (not (i32 GPR:$rs1)))), (QC_CLO GPR:$rs1)>;
let Predicates = [HasVendorXqciint, IsRV32] in
def : Pat<(riscv_mileaveret_glue), (QC_C_MILEAVERET)>;
-let Predicates = [HasVendorXqcicm, IsRV32] in {
-// (SELECT X, Y, Z) is canonicalised to `(riscv_selectcc x, 0, NE, y, z)`.
-// This exists to prioritise over the `Select_GPR_Using_CC_GPR` pattern.
-def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETNE, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
- (QC_MVNEI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;
-def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETEQ, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
- (QC_MVEQI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;
-
+let Predicates = [HasVendorXqcicm, NoShortForwardBranchOpt, IsRV32] in {
def : QCIMVCCPat<SETEQ, QC_MVEQ>;
def : QCIMVCCPat<SETNE, QC_MVNE>;
def : QCIMVCCPat<SETLT, QC_MVLT>;
@@ -1553,12 +1550,24 @@ def : QCIMVCCPat<SETULT, QC_MVLTU>;
def : QCIMVCCPat<SETGE, QC_MVGE>;
def : QCIMVCCPat<SETUGE, QC_MVGEU>;
-def : QCIMVCCIPat<SETEQ, QC_MVEQI, simm5>;
-def : QCIMVCCIPat<SETNE, QC_MVNEI, simm5>;
-def : QCIMVCCIPat<SETLT, QC_MVLTI, simm5>;
-def : QCIMVCCIPat<SETULT, QC_MVLTUI, uimm5>;
-def : QCIMVCCIPat<SETGE, QC_MVGEI, simm5>;
-def : QCIMVCCIPat<SETUGE, QC_MVGEUI, uimm5>;
+// These exist to prioritise over the `Select_GPR_Using_CC_GPR` pattern for X0.
+def : QCIMVCCIZeroPat<SETEQ, QC_MVEQI>;
+def : QCIMVCCIZeroPat<SETNE, QC_MVNEI>;
+def : QCIMVCCIZeroPat<SETLT, QC_MVLTI>;
+def : QCIMVCCIZeroPat<SETULT, QC_MVLTUI>;
+def : QCIMVCCIZeroPat<SETGE, QC_MVGEI>;
+def : QCIMVCCIZeroPat<SETUGE, QC_MVGEUI>;
+}
+
+let Predicates = [HasVendorXqcicm, IsRV32] in {
+// These all use *imm5nonzero because we want to use PseudoCCMOVGPR with X0 when SFB is enabled.
+// When SFB is not enabled, the `QCIMVCCIZeroPat`s above will be used if RHS=0.
+def : QCIMVCCIPat<SETEQ, QC_MVEQI, simm5nonzero>;
+def : QCIMVCCIPat<SETNE, QC_MVNEI, simm5nonzero>;
+def : QCIMVCCIPat<SETLT, QC_MVLTI, simm5nonzero>;
+def : QCIMVCCIPat<SETULT, QC_MVLTUI, uimm5nonzero>;
+def : QCIMVCCIPat<SETGE, QC_MVGEI, simm5nonzero>;
+def : QCIMVCCIPat<SETUGE, QC_MVGEUI, uimm5nonzero>;
}
let Predicates = [HasVendorXqcicli, IsRV32] in {
diff --git a/llvm/test/CodeGen/RISCV/select-bare.ll b/llvm/test/CodeGen/RISCV/select-bare.ll
index 44028a7651b95..550eb94724ff2 100644
--- a/llvm/test/CodeGen/RISCV/select-bare.ll
+++ b/llvm/test/CodeGen/RISCV/select-bare.ll
@@ -3,7 +3,7 @@
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI
define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll
index b57f625cb867f..2fc64b5681bf8 100644
--- a/llvm/test/CodeGen/RISCV/select-cc.ll
+++ b/llvm/test/CodeGen/RISCV/select-cc.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -disable-block-placement -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32I %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI
; RUN: llc -mtriple=riscv64 -disable-block-placement -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64I %s
@@ -88,39 +88,38 @@ define signext i32 @foo(i32 signext %a, ptr %b) nounwind {
; RV32IXQCI-LABEL: foo:
; RV32IXQCI: # %bb.0:
; RV32IXQCI-NEXT: lw a2, 0(a1)
-; RV32IXQCI-NEXT: lw a4, 0(a1)
-; RV32IXQCI-NEXT: lw t5, 0(a1)
-; RV32IXQCI-NEXT: lw t4, 0(a1)
-; RV32IXQCI-NEXT: lw t3, 0(a1)
-; RV32IXQCI-NEXT: lw t2, 0(a1)
-; RV32IXQCI-NEXT: lw t0, 0(a1)
-; RV32IXQCI-NEXT: lw a7, 0(a1)
-; RV32IXQCI-NEXT: lw a6, 0(a1)
; RV32IXQCI-NEXT: lw a3, 0(a1)
-; RV32IXQCI-NEXT: lw t1, 0(a1)
+; RV32IXQCI-NEXT: lw a4, 0(a1)
; RV32IXQCI-NEXT: lw a5, 0(a1)
-; RV32IXQCI-NEXT: bltz t1, .LBB0_2
+; RV32IXQCI-NEXT: qc.mvne a0, a0, a2, a2
+; RV32IXQCI-NEXT: qc.mveq a0, a0, a3, a3
+; RV32IXQCI-NEXT: lw a2, 0(a1)
+; RV32IXQCI-NEXT: qc.mvgeu a0, a4, a0, a4
+; RV32IXQCI-NEXT: lw a3, 0(a1)
+; RV32IXQCI-NEXT: qc.mvltu a0, a0, a5, a5
+; RV32IXQCI-NEXT: lw a4, 0(a1)
+; RV32IXQCI-NEXT: qc.mvgeu a0, a0, a2, a2
+; RV32IXQCI-NEXT: lw a2, 0(a1)
+; RV32IXQCI-NEXT: qc.mvltu a0, a3, a0, a3
+; RV32IXQCI-NEXT: lw a3, 0(a1)
+; RV32IXQCI-NEXT: qc.mvge a0, a4, a0, a4
+; RV32IXQCI-NEXT: lw a4, 0(a1)
+; RV32IXQCI-NEXT: qc.mvlt a0, a0, a2, a2
+; RV32IXQCI-NEXT: lw a2, 0(a1)
+; RV32IXQCI-NEXT: qc.mvge a0, a0, a3, a3
+; RV32IXQCI-NEXT: lw a3, 0(a1)
+; RV32IXQCI-NEXT: qc.mvlt a0, a4, a0, a4
+; RV32IXQCI-NEXT: lw a4, 0(a1)
+; RV32IXQCI-NEXT: lw a1, 0(a1)
+; RV32IXQCI-NEXT: blez a2, .LBB0_2
; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: li a5, 0
-; RV32IXQCI-NEXT: qc.mveq a2, a0, a2, a0
-; RV32IXQCI-NEXT: qc.mvne a4, a2, a4, a2
-; RV32IXQCI-NEXT: qc.mvltu t5, t5, a4, a4
-; RV32IXQCI-NEXT: qc.mvgeu t4, t5, t4, t5
-; RV32IXQCI-NEXT: qc.mvltu t3, t4, t3, t4
-; RV32IXQCI-NEXT: qc.mvgeu t2, t2, t3, t3
-; RV32IXQCI-NEXT: qc.mvlt t0, t0, t2, t2
-; RV32IXQCI-NEXT: qc.mvge a7, t0, a7, t0
-; RV32IXQCI-NEXT: qc.mvlt a6, a7, a6, a7
-; RV32IXQCI-NEXT: qc.mvge a3, a3, a6, a6
-; RV32IXQCI-NEXT: qc.mvlt a3, a5, t1, t1
-; RV32IXQCI-NEXT: mv a5, a3
+; RV32IXQCI-NEXT: mv a0, a2
; RV32IXQCI-NEXT: .LBB0_2:
-; RV32IXQCI-NEXT: lw a2, 0(a1)
-; RV32IXQCI-NEXT: lw a0, 0(a1)
-; RV32IXQCI-NEXT: li a1, 1024
-; RV32IXQCI-NEXT: qc.mvlt a2, a1, a2, a5
-; RV32IXQCI-NEXT: li a1, 2046
-; RV32IXQCI-NEXT: qc.mvltu a0, a1, t1, a2
+; RV32IXQCI-NEXT: qc.mvnei a0, a2, 0, a3
+; RV32IXQCI-NEXT: li a3, 1024
+; RV32IXQCI-NEXT: qc.mvge a0, a3, a4, a4
+; RV32IXQCI-NEXT: li a3, 2046
+; RV32IXQCI-NEXT: qc.mvgeu a0, a3, a2, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: foo:
diff --git a/llvm/test/CodeGen/RISCV/select-cond.ll b/llvm/test/CodeGen/RISCV/select-cond.ll
index 3ca0f46e8c02f..a3c48737edc3c 100644
--- a/llvm/test/CodeGen/RISCV/select-cond.ll
+++ b/llvm/test/CodeGen/RISCV/select-cond.ll
@@ -7,7 +7,7 @@
; RUN: | FileCheck %s --check-prefixes=RV32-XQCICM
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32-XQCICS
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV64
diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll
index 65d10bb823418..dfac6e1630d25 100644
--- a/llvm/test/CodeGen/RISCV/select-const.ll
+++ b/llvm/test/CodeGen/RISCV/select-const.ll
@@ -5,7 +5,7 @@
; RUN: | FileCheck -check-prefixes=RV32,RV32IF %s
; RUN: llc -mtriple=riscv32 -mattr=+zicond -target-abi=ilp32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32,RV32ZICOND %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI
; RUN: llc -mtriple=riscv64 -target-abi=lp64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64,RV64I %s
@@ -579,9 +579,9 @@ define i32 @select_slt_zero_constant1_constant2(i32 signext %x) {
;
; RV32IXQCI-LABEL: select_slt_zero_constant1_constant2:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: srai a0, a0, 31
-; RV32IXQCI-NEXT: andi a0, a0, 10
-; RV32IXQCI-NEXT: addi a0, a0, -3
+; RV32IXQCI-NEXT: li a1, -3
+; RV32IXQCI-NEXT: qc.lilti a1, a0, 0, 7
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64-LABEL: select_slt_zero_constant1_constant2:
@@ -605,9 +605,9 @@ define i32 @select_sgt_negative_one_constant1_constant2(i32 signext %x) {
;
; RV32IXQCI-LABEL: select_sgt_negative_one_constant1_constant2:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: srai a0, a0, 31
-; RV32IXQCI-NEXT: andi a0, a0, -10
-; RV32IXQCI-NEXT: addi a0, a0, 7
+; RV32IXQCI-NEXT: li a1, -3
+; RV32IXQCI-NEXT: qc.ligei a1, a0, 0, 7
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64-LABEL: select_sgt_negative_one_constant1_constant2:
@@ -653,12 +653,10 @@ define i32 @select_nonnegative_lui_addi(i32 signext %x) {
;
; RV32IXQCI-LABEL: select_nonnegative_lui_addi:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: mv a1, a0
-; RV32IXQCI-NEXT: lui a0, 4
-; RV32IXQCI-NEXT: bgez a1, .LBB21_2
-; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: li a0, 25
-; RV32IXQCI-NEXT: .LBB21_2:
+; RV32IXQCI-NEXT: lui a2, 4
+; RV32IXQCI-NEXT: li a1, 25
+; RV32IXQCI-NEXT: qc.mvgei a1, a0, 0, a2
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: select_nonnegative_lui_addi:
@@ -726,12 +724,10 @@ define i32 @select_nonnegative_lui_addi_swapped(i32 signext %x) {
;
; RV32IXQCI-LABEL: select_nonnegative_lui_addi_swapped:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: bgez a0, .LBB22_2
-; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: lui a0, 4
-; RV32IXQCI-NEXT: ret
-; RV32IXQCI-NEXT: .LBB22_2:
-; RV32IXQCI-NEXT: li a0, 25
+; RV32IXQCI-NEXT: li a2, 25
+; RV32IXQCI-NEXT: lui a1, 4
+; RV32IXQCI-NEXT: qc.mvgei a1, a0, 0, a2
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: select_nonnegative_lui_addi_swapped:
@@ -801,13 +797,13 @@ define i32 @diff_shl_addi(i32 signext %x) {
;
; RV32IXQCI-LABEL: diff_shl_addi:
; RV32IXQCI: # %bb.0:
+; RV32IXQCI-NEXT: lui a2, 4
+; RV32IXQCI-NEXT: li a1, 25
; RV32IXQCI-NEXT: bgez a0, .LBB23_2
; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: lui a0, 4
-; RV32IXQCI-NEXT: addi a0, a0, 25
-; RV32IXQCI-NEXT: ret
+; RV32IXQCI-NEXT: addi a1, a2, 25
; RV32IXQCI-NEXT: .LBB23_2:
-; RV32IXQCI-NEXT: li a0, 25
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: diff_shl_addi:
@@ -876,13 +872,13 @@ define i32 @diff_shl_addi2(i32 signext %x) {
;
; RV32IXQCI-LABEL: diff_shl_addi2:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: bgez a0, .LBB24_2
+; RV32IXQCI-NEXT: lui a2, 4
+; RV32IXQCI-NEXT: li a1, 25
+; RV32IXQCI-NEXT: bltz a0, .LBB24_2
; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: li a0, 25
-; RV32IXQCI-NEXT: ret
+; RV32IXQCI-NEXT: addi a1, a2, 25
; RV32IXQCI-NEXT: .LBB24_2:
-; RV32IXQCI-NEXT: lui a0, 4
-; RV32IXQCI-NEXT: addi a0, a0, 25
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: diff_shl_addi2:
@@ -929,9 +925,10 @@ define i32 @diff_pow2_24_16(i32 signext %x) {
;
; RV32IXQCI-LABEL: diff_pow2_24_16:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: srai a0, a0, 31
-; RV32IXQCI-NEXT: andi a0, a0, -8
-; RV32IXQCI-NEXT: addi a0, a0, 24
+; RV32IXQCI-NEXT: li a2, 24
+; RV32IXQCI-NEXT: li a1, 16
+; RV32IXQCI-NEXT: qc.mvgei a1, a0, 0, a2
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64-LABEL: diff_pow2_24_16:
@@ -955,9 +952,10 @@ define i32 @diff_pow2_16_24(i32 signext %x) {
;
; RV32IXQCI-LABEL: diff_pow2_16_24:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: srli a0, a0, 28
-; RV32IXQCI-NEXT: andi a0, a0, 8
-; RV32IXQCI-NEXT: addi a0, a0, 16
+; RV32IXQCI-NEXT: li a2, 16
+; RV32IXQCI-NEXT: li a1, 24
+; RV32IXQCI-NEXT: qc.mvgei a1, a0, 0, a2
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64-LABEL: diff_pow2_16_24:
@@ -1008,14 +1006,14 @@ define i32 @zext_or_constant(i32 signext %x) {
;
; RV32IXQCI-LABEL: zext_or_constant:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: bgez a0, .LBB27_2
+; RV32IXQCI-NEXT: srli a2, a0, 31
+; RV32IXQCI-NEXT: lui a1, 140
+; RV32IXQCI-NEXT: addi a1, a1, 417
+; RV32IXQCI-NEXT: bltz a0, .LBB27_2
; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: lui a0, 140
-; RV32IXQCI-NEXT: addi a0, a0, 417
-; RV32IXQCI-NEXT: ret
+; RV32IXQCI-NEXT: xori a1, a2, 1
; RV32IXQCI-NEXT: .LBB27_2:
-; RV32IXQCI-NEXT: srli a0, a0, 31
-; RV32IXQCI-NEXT: xori a0, a0, 1
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: zext_or_constant:
@@ -1095,14 +1093,14 @@ define i32 @zext_or_constant2(i32 signext %x) {
;
; RV32IXQCI-LABEL: zext_or_constant2:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: bltz a0, .LBB28_2
+; RV32IXQCI-NEXT: srli a2, a0, 31
+; RV32IXQCI-NEXT: lui a1, 140
+; RV32IXQCI-NEXT: addi a1, a1, 417
+; RV32IXQCI-NEXT: bgez a0, .LBB28_2
; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: lui a0, 140
-; RV32IXQCI-NEXT: addi a0, a0, 417
-; RV32IXQCI-NEXT: ret
+; RV32IXQCI-NEXT: xori a1, a2, 1
; RV32IXQCI-NEXT: .LBB28_2:
-; RV32IXQCI-NEXT: srli a0, a0, 31
-; RV32IXQCI-NEXT: xori a0, a0, 1
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: zext_or_constant2:
@@ -1183,14 +1181,14 @@ define i32 @sext_or_constant(i32 signext %x) {
;
; RV32IXQCI-LABEL: sext_or_constant:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: bgez a0, .LBB29_2
+; RV32IXQCI-NEXT: srli a2, a0, 31
+; RV32IXQCI-NEXT: lui a1, 140
+; RV32IXQCI-NEXT: addi a1, a1, 417
+; RV32IXQCI-NEXT: bltz a0, .LBB29_2
; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: lui a0, 140
-; RV32IXQCI-NEXT: addi a0, a0, 417
-; RV32IXQCI-NEXT: ret
+; RV32IXQCI-NEXT: addi a1, a2, -1
; RV32IXQCI-NEXT: .LBB29_2:
-; RV32IXQCI-NEXT: srli a0, a0, 31
-; RV32IXQCI-NEXT: addi a0, a0, -1
+; RV32IXQCI-NEXT: mv a0, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: sext_or_constant:
@@ -1271,14 +1269,14 @@ define i32 @sext_or_constant2(i32 signext %x) {
;
; RV32IXQCI-LABEL: sext_or_constant2:
; RV32IXQCI: # %bb.0:
-; RV32IXQCI-NEXT: bltz a0, .LBB30_2
+; RV32IXQCI-NEXT: srli a2, a0, 31
+; RV32IXQCI-NEXT: lui a1, 140
+; RV32IXQCI-NEXT: addi a1, a1, 417
+; RV32IXQCI-NEXT: bgez a0, .LBB30_2
; RV32IXQCI-NEXT: # %bb.1:
-; RV32IXQCI-NEXT: lui a0, 140
-; RV32IXQCI-NEXT: addi a0, a0, 417
-; RV32IXQCI-NEXT: ret
+; RV32IXQCI-NEXT: addi a1, a2, -1
; RV32IXQCI-NEXT: .LBB30_2:
-; RV32IXQCI-NEXT: ...
[truncated]
|
I suggest looking at each commit individually to more clearly differentiate the changes that came from updating the RUN lines from those making the actual CodeGen change |
; RV32IXQCI-NEXT: lw t1, 0(a1) | ||
; RV32IXQCI-NEXT: lw a4, 0(a1) | ||
; RV32IXQCI-NEXT: lw a5, 0(a1) | ||
; RV32IXQCI-NEXT: bltz t1, .LBB0_2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was there a branch here before?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Broadly because t111: i32 = RISCVISD::SELECT_CC t47, Constant:i32<0>, setge:ch, t112, t51
was getting selected into t111: i32 = Select_GPR_Using_CC_GPR t47, Register:i32 $x0, TargetConstant:i32<3> /*==RISCVCC::COND_GE*/, t112, t51
.
This is due to the pattern here:
llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
Lines 1666 to 1669 in 5b40491
def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond, (vt valty:$truev), | |
valty:$falsev), | |
(!cast<Instruction>(NAME#"_Using_CC_GPR") GPR:$lhs, (XLenVT X0), | |
(IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>; |
Complexity=8
.
There was only the generic QC_MVGE
pattern for SETGE
, which has complexity=3. We did have some Complexity=8
patterns with 0 literals (for QC_MVccI
, but only for SETEQ/SETNE:
llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
Lines 1544 to 1547 in 5b40491
def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETNE, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))), | |
(QC_MVNEI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>; | |
def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETEQ, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))), | |
(QC_MVEQI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>; |
This PR wrapped up the latter patterns into QCIMVCCIZeroPat
, and added variants for all the condition codes supported.
These two patterns being almost identical and having the same complexity is not a problem, because the Select_GPR_Using_CC_GPR
has a custom inserter, which adds to the cost in getResultPatternCost
as used by the PatternSortingPredicate
, so it will always come after a single-instruction result with no custom inserter, as is present in QCIMVCCIZeroPat
.
I've been trying to work through internally to make sure that all our patterns are totally ordered with PatternSortingPredicate
(when the patterns are enabled). We have one last place where there's an overlap and that sorting predicate is not able to differentiate the patterns, which we know we need to solve.
break; | ||
case RISCVCC::COND_LT: | ||
CMovOpcode = RISCV::QC_MVLT; | ||
CMovIOpcode = RISCV::QC_MVNEI; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be MVLTI?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, which shows my testing isn't good enough yet. Will look at adding additional relevant RUN lines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
This change implements support for the combination of Xqci and the Short Forward Branch optimisation. In particular, we want to prioritise `Branch+ALU` (short forward branches) over the equivalent `ALU+CMov`, when the compared values are both registers, and the selected values come from registers (as this is what `PseudoCCMOVGPR` supports). However, when expanding `PseudoCCMOVGPR` (i.e., `Branch+MV`), we instead want to expand it to a conditional move (for code size reasons), so I have added `RISCVExpandPseudo::expandCCOpToCMov` to try to do so. This mostly works, except if `PseudoCCMOVGPR` is comparing against zero and gets commuted - as can be seen in one example in `foo` in `select-cc.ll`. This change: - updates the attributes used for the XQCI RUN lines for the select tests. - modifies the CodeGen patterns and predicates to prioritise selecting the SFB Pseudo. - adds CodeGen patterns for MVLTI/MVLTUI/MVGEI/MVGEUI with imm=zero, to prioritise over the equivalent `Select_GPR_Using_CC_GPR` patterns for rhs=X0. - adds a hook to attempt to turn the predicated-mov Pseudo back into a Conditional Move from Xqcicm (which matches the pseudo in terms of tied register operands).
This PR implements support for the combination of Xqci and the Short Forward Branch optimisation.
In particular, we want to prioritise
Branch+ALU
(short forward branches) over the equivalentALU+CMov
, when the compared values are both registers, and the selected values come from registers (as this is whatPseudoCCMOVGPR
supports).However, when expanding
PseudoCCMOVGPR
(i.e.,Branch+MV
), we instead want to expand it to a conditional move (for code size reasons), so I have addedRISCVExpandPseudo::expandCCOpToCMov
to try to do so. This mostly works, except ifPseudoCCMOVGPR
is comparing against zero and gets commuted - as can be seen in one example infoo
inselect-cc.ll
.This PR:
Select_GPR_Using_CC_GPR
patterns for rhs=X0.