-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV] Add ISel patterns for Qualcomm uC Xqcics extension #146675
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Add CodeGen support for conditional select instructions in this extension Change-Id: I93b8e1210588d66237af7e709b4b6ea15fd837e2
Change-Id: Ifb18259f2ef860e05b4f673aceebf349d7ff8103
Change-Id: I7e1a682ed0b2e20330c6985eb1bd9ac024e9987d
Change-Id: I87527638542a2a7c89b9e782fa825bb398f3e082
@llvm/pr-subscribers-backend-risc-v Author: quic_hchandel (hchandel) ChangesAdd CodeGen support for conditional select instructions in this extension Patch is 29.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146675.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c1212a8b1cf17..fb7d609f4b575 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -437,7 +437,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov() &&
- !Subtarget.hasVendorXqcicm())
+ !Subtarget.hasVendorXqcicm() && !Subtarget.hasVendorXqcics())
setOperationAction(ISD::SELECT, XLenVT, Custom);
if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index e8dd164714875..3d582c38d8277 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1312,6 +1312,30 @@ class QCIMVCCIPat<CondCode Cond, QCIMVCCI Inst>
: Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rs1), simm5:$imm, Cond)), (XLenVT GPRNoX0:$rs3), (XLenVT GPRNoX0:$rd)),
(Inst GPRNoX0:$rd, GPRNoX0:$rs1, simm5:$imm, GPRNoX0:$rs3)>;
+class QCISELECTCCIPat<CondCode Cond, QCISELECTCCI Inst>
+ : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), simm5:$imm, Cond)), (XLenVT GPRNoX0:$rs2), (XLenVT GPRNoX0:$rs3)),
+ (Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, GPRNoX0:$rs3)>;
+
+class QCISELECTICCIPat<CondCode Cond, QCISELECTICCI Inst>
+ : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), simm5:$imm, Cond)), (XLenVT GPRNoX0:$rs2), simm5:$simm2),
+ (Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, simm5:$simm2)>;
+
+class QCISELECTICCIPatInv<CondCode Cond, QCISELECTICCI Inst>
+ : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), simm5:$imm, Cond)), simm5:$simm2, (XLenVT GPRNoX0:$rs2)),
+ (Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, simm5:$simm2)>;
+
+class QCISELECTICCPat<CondCode Cond, QCISELECTICC Inst>
+ : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs1), Cond)), (XLenVT GPRNoX0:$rs2), simm5:$simm2),
+ (Inst GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, simm5:$simm2)>;
+
+class QCISELECTICCPatInv<CondCode Cond, QCISELECTICC Inst>
+ : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs1), Cond)), simm5:$simm2, (XLenVT GPRNoX0:$rs2)),
+ (Inst GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, simm5:$simm2)>;
+
+class QCISELECTIICCPat<CondCode Cond, QCISELECTIICC Inst>
+ : Pat<(select (XLenVT (setcc (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs1), Cond)), simm5:$simm1, simm5:$simm2),
+ (Inst GPRNoX0:$rd, GPRNoX0:$rs1, simm5:$simm1, simm5:$simm2)>;
+
// Match `riscv_brcc` and lower to the appropriate XQCIBI branch instruction.
class BcciPat<CondCode Cond, QCIBranchInst_rii Inst, DAGOperand InTyImm>
: Pat<(riscv_brcc (XLenVT GPRNoX0:$rs1), InTyImm:$rs2, Cond, bb:$imm12),
@@ -1461,6 +1485,31 @@ def : QCIMVCCIPat <SETLT, QC_MVLTI>;
def : QCIMVCCIPat <SETULT, QC_MVLTUI>;
}
+let Predicates = [HasVendorXqcics, IsRV32] in {
+def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2),(XLenVT GPRNoX0:$rs3)),
+ (QC_SELECTEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs3, GPRNoX0:$rs2)>;
+
+let AddedComplexity = 1 in {
+def : QCISELECTCCIPat <SETEQ, QC_SELECTEQI>;
+def : QCISELECTCCIPat <SETNE, QC_SELECTNEI>;
+}
+
+def : QCISELECTICCIPat <SETEQ, QC_SELECTIEQI>;
+def : QCISELECTICCIPat <SETNE, QC_SELECTINEI>;
+
+def : QCISELECTICCIPatInv <SETEQ, QC_SELECTINEI>;
+def : QCISELECTICCIPatInv <SETNE, QC_SELECTIEQI>;
+
+def : QCISELECTICCPat <SETEQ, QC_SELECTIEQ>;
+def : QCISELECTICCPat <SETNE, QC_SELECTINE>;
+
+def : QCISELECTICCPatInv <SETEQ, QC_SELECTINE>;
+def : QCISELECTICCPatInv <SETNE, QC_SELECTIEQ>;
+
+def : QCISELECTIICCPat <SETEQ, QC_SELECTIIEQ>;
+def : QCISELECTIICCPat <SETNE, QC_SELECTIINE>;
+} // Predicates = [HasVendorXqcics, IsRV32]
+
//===----------------------------------------------------------------------===/i
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/select-cond.ll b/llvm/test/CodeGen/RISCV/select-cond.ll
index d9f9ad379ee95..7ece32681baf9 100644
--- a/llvm/test/CodeGen/RISCV/select-cond.ll
+++ b/llvm/test/CodeGen/RISCV/select-cond.ll
@@ -5,6 +5,8 @@
; RUN: | FileCheck %s --check-prefixes=RV32-THEAD
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32-XQCICM
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=RV32-XQCICS
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV64
; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
@@ -35,6 +37,12 @@ define signext i32 @select_i32_trunc(i32 signext %cond, i32 signext %x, i32 sign
; RV32-XQCICM-NEXT: mv a0, a1
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_trunc:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: andi a0, a0, 1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a2, a1
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_trunc:
; RV64: # %bb.0:
; RV64-NEXT: andi a3, a0, 1
@@ -80,6 +88,12 @@ define signext i32 @select_i32_param(i1 signext %cond, i32 signext %x, i32 signe
; RV32-XQCICM-NEXT: mv a0, a1
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_param:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: andi a0, a0, 1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a2, a1
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_param:
; RV64: # %bb.0:
; RV64-NEXT: andi a3, a0, 1
@@ -122,6 +136,13 @@ define signext i32 @select_i32_eq(i32 signext %a, i32 signext %b, i32 signext %x
; RV32-XQCICM-NEXT: mv a0, a3
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_eq:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: xor a0, a0, a1
+; RV32-XQCICS-NEXT: seqz a0, a0
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a2
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_eq:
; RV64: # %bb.0:
; RV64-NEXT: beq a0, a1, .LBB2_2
@@ -164,6 +185,13 @@ define signext i32 @select_i32_ne(i32 signext %a, i32 signext %b, i32 signext %x
; RV32-XQCICM-NEXT: mv a0, a3
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_ne:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: xor a0, a0, a1
+; RV32-XQCICS-NEXT: snez a0, a0
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a2
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_ne:
; RV64: # %bb.0:
; RV64-NEXT: bne a0, a1, .LBB3_2
@@ -206,6 +234,12 @@ define signext i32 @select_i32_ugt(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a3
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_ugt:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a1, a0
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a2
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_ugt:
; RV64: # %bb.0:
; RV64-NEXT: bltu a1, a0, .LBB4_2
@@ -248,6 +282,12 @@ define signext i32 @select_i32_uge(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a2
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_uge:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a2, a3
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_uge:
; RV64: # %bb.0:
; RV64-NEXT: bgeu a0, a1, .LBB5_2
@@ -290,6 +330,12 @@ define signext i32 @select_i32_ult(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a3
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_ult:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a2
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_ult:
; RV64: # %bb.0:
; RV64-NEXT: bltu a0, a1, .LBB6_2
@@ -332,6 +378,12 @@ define signext i32 @select_i32_ule(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a2
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_ule:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a1, a0
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a2, a3
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_ule:
; RV64: # %bb.0:
; RV64-NEXT: bgeu a1, a0, .LBB7_2
@@ -374,6 +426,12 @@ define signext i32 @select_i32_sgt(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a3
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_sgt:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: slt a0, a1, a0
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a2
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_sgt:
; RV64: # %bb.0:
; RV64-NEXT: blt a1, a0, .LBB8_2
@@ -416,6 +474,12 @@ define signext i32 @select_i32_sge(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a2
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_sge:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: slt a0, a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a2, a3
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_sge:
; RV64: # %bb.0:
; RV64-NEXT: bge a0, a1, .LBB9_2
@@ -458,6 +522,12 @@ define signext i32 @select_i32_slt(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a3
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_slt:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: slt a0, a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a2
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_slt:
; RV64: # %bb.0:
; RV64-NEXT: blt a0, a1, .LBB10_2
@@ -500,6 +570,12 @@ define signext i32 @select_i32_sle(i32 signext %a, i32 signext %b, i32 signext %
; RV32-XQCICM-NEXT: mv a0, a2
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i32_sle:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: slt a0, a1, a0
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a2, a3
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i32_sle:
; RV64: # %bb.0:
; RV64-NEXT: bge a1, a0, .LBB11_2
@@ -550,6 +626,14 @@ define i64 @select_i64_trunc(i64 %cond, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a0, a2
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_trunc:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: andi a1, a0, 1
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a5, a3
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a4, a2
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_trunc:
; RV64: # %bb.0:
; RV64-NEXT: andi a3, a0, 1
@@ -601,6 +685,15 @@ define i64 @select_i64_param(i1 %cond, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a2
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_param:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: andi a5, a0, 1
+; RV32-XQCICS-NEXT: mv a0, a5
+; RV32-XQCICS-NEXT: qc.selecteqi a5, 0, a4, a2
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a1
+; RV32-XQCICS-NEXT: mv a1, a5
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_param:
; RV64: # %bb.0:
; RV64-NEXT: andi a3, a0, 1
@@ -657,6 +750,16 @@ define i64 @select_i64_eq(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a7
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_eq:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: xor a0, a0, a2
+; RV32-XQCICS-NEXT: or a1, a0, a1
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a4, a6
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a5, a7
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_eq:
; RV64: # %bb.0:
; RV64-NEXT: beq a0, a1, .LBB14_2
@@ -713,6 +816,16 @@ define i64 @select_i64_ne(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a7
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_ne:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: xor a0, a0, a2
+; RV32-XQCICS-NEXT: or a1, a0, a1
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selectnei a0, 0, a4, a6
+; RV32-XQCICS-NEXT: qc.selectnei a1, 0, a5, a7
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_ne:
; RV64: # %bb.0:
; RV64-NEXT: bne a0, a1, .LBB15_2
@@ -774,6 +887,18 @@ define i64 @select_i64_ugt(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a5
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_ugt:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a2, a0
+; RV32-XQCICS-NEXT: sltu a2, a3, a1
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a6, a4
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a7, a5
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_ugt:
; RV64: # %bb.0:
; RV64-NEXT: bltu a1, a0, .LBB16_2
@@ -835,6 +960,18 @@ define i64 @select_i64_uge(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a7
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_uge:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a0, a2
+; RV32-XQCICS-NEXT: sltu a2, a1, a3
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a4, a6
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a5, a7
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_uge:
; RV64: # %bb.0:
; RV64-NEXT: bgeu a0, a1, .LBB17_2
@@ -896,6 +1033,18 @@ define i64 @select_i64_ult(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a5
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_ult:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a0, a2
+; RV32-XQCICS-NEXT: sltu a2, a1, a3
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a6, a4
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a7, a5
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_ult:
; RV64: # %bb.0:
; RV64-NEXT: bltu a0, a1, .LBB18_2
@@ -957,6 +1106,18 @@ define i64 @select_i64_ule(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a7
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_ule:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a2, a0
+; RV32-XQCICS-NEXT: sltu a2, a3, a1
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a4, a6
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a5, a7
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_ule:
; RV64: # %bb.0:
; RV64-NEXT: bgeu a1, a0, .LBB19_2
@@ -1018,6 +1179,18 @@ define i64 @select_i64_sgt(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a5
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_sgt:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a2, a0
+; RV32-XQCICS-NEXT: slt a2, a3, a1
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a6, a4
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a7, a5
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_sgt:
; RV64: # %bb.0:
; RV64-NEXT: blt a1, a0, .LBB20_2
@@ -1079,6 +1252,18 @@ define i64 @select_i64_sge(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a7
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_sge:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a0, a2
+; RV32-XQCICS-NEXT: slt a2, a1, a3
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a4, a6
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a5, a7
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_sge:
; RV64: # %bb.0:
; RV64-NEXT: bge a0, a1, .LBB21_2
@@ -1140,6 +1325,18 @@ define i64 @select_i64_slt(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a5
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_slt:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a0, a2
+; RV32-XQCICS-NEXT: slt a2, a1, a3
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a6, a4
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a7, a5
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_slt:
; RV64: # %bb.0:
; RV64-NEXT: blt a0, a1, .LBB22_2
@@ -1201,6 +1398,18 @@ define i64 @select_i64_sle(i64 %a, i64 %b, i64 %x, i64 %y) nounwind {
; RV32-XQCICM-NEXT: mv a1, a7
; RV32-XQCICM-NEXT: ret
;
+; RV32-XQCICS-LABEL: select_i64_sle:
+; RV32-XQCICS: # %bb.0:
+; RV32-XQCICS-NEXT: sltu a0, a2, a0
+; RV32-XQCICS-NEXT: slt a2, a3, a1
+; RV32-XQCICS-NEXT: xor a1, a1, a3
+; RV32-XQCICS-NEXT: seqz a1, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a2, a0
+; RV32-XQCICS-NEXT: mv a0, a1
+; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a4, a6
+; RV32-XQCICS-NEXT: qc.selecteqi a1, 0, a5, a7
+; RV32-XQCICS-NEXT: ret
+;
; RV64-LABEL: select_i64_sle:
; RV64: # %bb.0:
; RV64-NEXT: bge a1, a0, .LBB23_2
diff --git a/llvm/test/CodeGen/RISCV/xqcics.ll b/llvm/test/CodeGen/RISCV/xqcics.ll
new file mode 100644
index 0000000000000..c0c07a9329179
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xqcics.ll
@@ -0,0 +1,381 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; Test that we are able to generate the Xqcics instructions
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=RV32IXQCICS
+
+define i32 @select_cc_example_eq(i32 %a, i32 %b, i32 %x, i32 %y) {
+; RV32I-LABEL: select_cc_example_eq:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 11
+; RV32I-NEXT: beq a0, a1, .LBB0_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a2, a3
+; RV32I-NEXT: .LBB0_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV32IXQCICS-LABEL: select_cc_example_eq:
+; RV32IXQCICS: # %bb.0: # %entry
+; RV32IXQCICS-NEXT: qc.selecteqi a0, 11, a2, a3
+; RV32IXQCICS-NEXT: ret
+entry:
+ %cmp = icmp eq i32 %a, 11
+ %sel = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %sel
+}
+
+define i32 @select_cc_example_eq_c(i32 %a, i32 %b, i32 %x, i32 %y) {
+; RV32I-LABEL: select_cc_example_eq_c:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 11
+; RV32I-NEXT: beq a0, a1, .LBB1_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a2, a3
+; RV32I-NEXT: .LBB1_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV32IXQCICS-LABEL: select_cc_example_eq_c:
+; RV32IXQCICS: # %bb.0: # %entry
+; RV32IXQCICS-NEXT: qc.selecteqi a0, 11, a2, a3
+; RV32IXQCICS-NEXT: ret
+entry:
+ %cmp = icmp eq i32 11, %a
+ %sel = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %sel
+}
+
+define i32 @select_cc_example_ne(i32 %a, i32 %b, i32 %x, i32 %y) {
+; RV32I-LABEL: select_cc_example_ne:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: li a1, 11
+; RV32I-NEXT: bne a0, a1, .LBB2_2
+; RV32I-NEXT: # %bb.1: # %entry
+; RV32I-NEXT: mv a2, a3
+; RV32I-NEXT: .LBB2_2: # %entry
+; RV32I-NEXT: mv a0, a2
+; RV32I-NEXT: ret
+;
+; RV32IXQCICS-LABEL: select_cc_example_ne:
+; RV32IXQCICS: # %bb.0: # %entry
+; RV32IXQCICS-NEXT: qc.selectnei a0, 11, a2,...
[truncated]
|
def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2),(XLenVT GPRNoX0:$rs3)), | ||
(QC_SELECTEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs3, GPRNoX0:$rs2)>; | ||
|
||
let AddedComplexity = 1 in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a comment as to why the AddedComplexity
is needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we qualify the conflicting patterns with NoVendorXqcics instead? AddedComplexity is a bit of a hack. Especially when there are other similar patterns with register and immediate arguments. Increasing the complexity of these patterns brings it closer to the complexity score of QC_SELECTIEQI.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried this locally, but it disables xqcicm patterns when I give both experimental-xqcicm
and experimental-xqcics
even in cases when they should be used and in future, I think such scenarios are possible, where we enable both these extensions together.
; RV32-XQCICS-NEXT: xor a0, a0, a1 | ||
; RV32-XQCICS-NEXT: seqz a0, a0 | ||
; RV32-XQCICS-NEXT: qc.selecteqi a0, 0, a3, a2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This sequence is:
- xoring a0 and a1,
- setting a0 to
1
if the the xor result is zero, or to0
if it is non-zero (an invert) - then checking whether the value in
a0
is 0 or not
I think this could be done with just the xor and a qc.selectnei
. The same sort-of applies to select_i32_ne
, below.
This makes me wonder if we should be using riscv_seteq
and riscv_setne
- the reason we didn't for qc.mv<cc>
was because we had other comparisons to cover, but with qc.select*
the only comparisons are eq/ne.
I'm not 100% sure what the right thing to do here is (because the riscv_seteq/ne
don't quite seem to match with the patterns in this PR), so maybe the current implementation maybe good enough for the moment, and we can come back in the future to see if riscv_seteq/ne
are helpful.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we come back to this in a follow-up.
Change-Id: I79661099b5d5aae89a047279f47463f120cfbc2e
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a RUN line with both Xqcicm and Xqcics to show that we prefer Xqcics?
Change-Id: I3da7ecb9e89b6377d9689678ba39accfeb06a3fa
Thanks @svs-quic. Added the RUN line in xqcics.ll. The output produced with both the extensions enabled is same as Xqcics alone, so reused those CHECK prefixes for this RUN line. |
def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2),(XLenVT GPRNoX0:$rs3)), | ||
(QC_SELECTEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs3, GPRNoX0:$rs2)>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Output pattern operand names should match the final instruction.
def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2),(XLenVT GPRNoX0:$rs3)), | |
(QC_SELECTEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs3, GPRNoX0:$rs2)>; | |
def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs3), (XLenVT GPRNoX0:$rs2)), | |
(QC_SELECTEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs2, GPRNoX0:$rs3)>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be better to use QC_SELECTNEI so the operands don't have to be swapped?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
let Predicates = [HasVendorXqcics, IsRV32] in { | ||
def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2),(XLenVT GPRNoX0:$rs3)), | ||
(QC_SELECTEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs3, GPRNoX0:$rs2)>; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should there also be these patterns for when the one true/false values are an immediate, but there is no setcc?
def : Pat<(select (XLenVT GPRNoX0:$rd), (XLenVT GPRNoX0:$rs2), simm5:$simm2),
(QC_SELECTINEI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs2, simm5:$simm2)>;
def : Pat<(select (XLenVT GPRNoX0:$rd), simm5:$simm2, (XLenVT GPRNoX0:$rs2)),
(QC_SELECTIEQI GPRNoX0:$rd, (XLenVT 0), GPRNoX0:$rs2, simm5:$simm2)>;
Unfortunately, qc.selectiine can't have a rs1=x0 so you can't write a pattern for true and false both being immediates, but no setcc present.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They would likely help, yes.
Unfortunately, qc.selectiine can't have a rs1=x0 so you can't write a pattern for true and false both being immediates, but no setcc present.
But this is "fine" right, in that selection will end up putting one immediate into a register, and keeping the other as an immediate - rather than failing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But this is "fine" right, in that selection will end up putting one immediate into a register, and keeping the other as an immediate - rather than failing.
Correct.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added both these patterns. Also added the test cases when both true/false values to select are immediates.
Change-Id: I7a7abd76124b0d3a6ab744c4c5c80b007ac1d2f9
Add CodeGen support for conditional select instructions in this extension