-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][GISel] Select G_SELECT (G_ICMP, A, B) #68247
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-globalisel ChangesIf MI is a G_SELECT(G_ICMP(tst, A, B), C, D) then we can use (A, B, tst) as the (LHS, RHS, CC) of the Select_GPR_Using_CC_GPR. Patch is 20.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68247.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f97a0d84f686f9..93fdba869257d83 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -44,11 +44,17 @@ class RISCVInstructionSelector : public InstructionSelector {
const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) const;
+ // tblgen-erated 'select' implementation, used as the initial selector for
+ // the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+
+ // Custom selection methods
bool selectCopy(MachineInstr &MI, MachineRegisterInfo &MRI) const;
bool selectConstant(MachineInstr &MI, MachineIRBuilder &MIB,
MachineRegisterInfo &MRI) const;
bool selectSExtInreg(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
bool earlySelectShift(unsigned Opc, MachineInstr &I, MachineIRBuilder &MIB,
const MachineRegisterInfo &MRI);
@@ -59,6 +65,11 @@ class RISCVInstructionSelector : public InstructionSelector {
void renderNegImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ /// Returns a G_ICMP that is equivalent to MI, whose condition code matches
+ /// one of the comparisons supported directly by branches in the RISC-V ISA.
+ MachineInstr *createICMPForBranch(MachineInstr *MI, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+
const RISCVSubtarget &STI;
const RISCVInstrInfo &TII;
const RISCVRegisterInfo &TRI;
@@ -239,6 +250,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
}
case TargetOpcode::G_SEXT_INREG:
return selectSExtInreg(MI, MIB);
+ case TargetOpcode::G_SELECT:
+ return selectSelect(MI, MIB, MRI);
default:
return false;
}
@@ -376,6 +389,124 @@ bool RISCVInstructionSelector::selectSExtInreg(MachineInstr &MI,
return true;
}
+/// Returns the RISCVCC::CondCode that corresponds to the CmpInst::Predicate CC.
+/// CC Must be an ICMP Predicate.
+static RISCVCC::CondCode getRISCVCCFromICMP(CmpInst::Predicate CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Expected ICMP CmpInst::Predicate.");
+ case CmpInst::Predicate::ICMP_EQ:
+ return RISCVCC::COND_EQ;
+ case CmpInst::Predicate::ICMP_NE:
+ return RISCVCC::COND_NE;
+ case CmpInst::Predicate::ICMP_ULT:
+ return RISCVCC::COND_LTU;
+ case CmpInst::Predicate::ICMP_SLT:
+ return RISCVCC::COND_LT;
+ case CmpInst::Predicate::ICMP_UGE:
+ return RISCVCC::COND_GEU;
+ case CmpInst::Predicate::ICMP_SGE:
+ return RISCVCC::COND_GE;
+ }
+}
+
+MachineInstr *RISCVInstructionSelector::createICMPForBranch(
+ MachineInstr *MI, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) const {
+ assert(MI->getOpcode() == TargetOpcode::G_ICMP);
+ CmpInst::Predicate CC =
+ static_cast<CmpInst::Predicate>(MI->getOperand(1).getPredicate());
+ MachineOperand &LHS = MI->getOperand(2);
+ MachineOperand &RHS = MI->getOperand(3);
+
+ // Adjust comparisons to use comparison with 0 if possible.
+ MachineInstr *MaybeConstant = MRI.getVRegDef(RHS.getReg());
+ if (MaybeConstant && MaybeConstant->getOpcode() == TargetOpcode::G_CONSTANT) {
+ switch (CC) {
+ case CmpInst::Predicate::ICMP_SGT:
+ // Convert X > -1 to X >= 0
+ if (MaybeConstant->getOperand(1).getCImm()->getSExtValue() == -1) {
+ MachineInstr *Zero = MIB.buildConstant(
+ MRI.getType(MaybeConstant->getOperand(0).getReg()), 0);
+ selectConstant(*Zero, MIB, MRI);
+ return MIB.buildICmp(CmpInst::Predicate::ICMP_SGE, MI->getOperand(0),
+ LHS, Zero->getOperand(0));
+ }
+ break;
+ case CmpInst::Predicate::ICMP_SLT:
+ // Convert X < 1 to 0 >= X
+ if (MaybeConstant->getOperand(1).getCImm()->getSExtValue() == 1) {
+ MachineInstr *Zero= MIB.buildConstant(
+ MRI.getType(MaybeConstant->getOperand(0).getReg()), 0);
+ selectConstant(*Zero, MIB, MRI);
+ return MIB.buildICmp(CmpInst::Predicate::ICMP_SGE, MI->getOperand(0),
+ Zero->getOperand(0), LHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Expected ICMP CmpInst::Predicate.");
+ case CmpInst::Predicate::ICMP_EQ:
+ case CmpInst::Predicate::ICMP_NE:
+ case CmpInst::Predicate::ICMP_ULT:
+ case CmpInst::Predicate::ICMP_SLT:
+ case CmpInst::Predicate::ICMP_UGE:
+ case CmpInst::Predicate::ICMP_SGE:
+ // These CCs are supported directly by RISC-V branches.
+ return MI;
+ case CmpInst::Predicate::ICMP_SGT:
+ case CmpInst::Predicate::ICMP_SLE:
+ case CmpInst::Predicate::ICMP_UGT:
+ case CmpInst::Predicate::ICMP_ULE:
+ // These CCs are not supported directly by RISC-V branches, but changing the
+ // direction of the CC and swapping LHS and RHS are.
+ return MIB.buildICmp(CmpInst::getSwappedPredicate(CC), MI->getOperand(0),
+ RHS, LHS);
+ }
+}
+
+bool RISCVInstructionSelector::selectSelect(MachineInstr &MI,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ MachineInstr *Result;
+ MachineInstr *MaybeICMP = MRI.getVRegDef(MI.getOperand(1).getReg());
+ if (MaybeICMP && MaybeICMP->getOpcode() == TargetOpcode::G_ICMP) {
+ // If MI is a G_SELECT(G_ICMP(tst, A, B), C, D) then we can use (A, B, tst)
+ // as the (LHS, RHS, CC) of the Select_GPR_Using_CC_GPR.
+ MachineInstr *ICMPForBranch = createICMPForBranch(MaybeICMP, MIB, MRI);
+ CmpInst::Predicate CC = static_cast<CmpInst::Predicate>(
+ ICMPForBranch->getOperand(1).getPredicate());
+ Result = MIB.buildInstr(RISCV::Select_GPR_Using_CC_GPR)
+ .addDef(MI.getOperand(0).getReg());
+ Result->addOperand(ICMPForBranch->getOperand(2));
+ Result->addOperand(ICMPForBranch->getOperand(3));
+ Result->addOperand(
+ MachineOperand::CreateImm(getRISCVCCFromICMP(CC)));
+ Result->addOperand(MI.getOperand(2));
+ Result->addOperand(MI.getOperand(3));
+
+ // Delete ICMPForBranch since we know it has no users. Let the original
+ // G_ICMP be selected normally in case it has other users.
+ if (ICMPForBranch != MaybeICMP)
+ ICMPForBranch->eraseFromParent();
+ } else {
+ Result = MIB.buildInstr(RISCV::Select_GPR_Using_CC_GPR)
+ .addDef(MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(1).getReg())
+ .addReg(RISCV::X0)
+ .addImm(RISCVCC::COND_NE)
+ .addReg(MI.getOperand(2).getReg())
+ .addReg(MI.getOperand(3).getReg());
+ }
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Result, TII, TRI, RBI);
+}
+
namespace llvm {
InstructionSelector *
createRISCVInstructionSelector(const RISCVTargetMachine &TM,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 413af1ff4b9439a..bf331ca0bc6d997 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2128,7 +2128,7 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
}
break;
case ISD::SETLT:
- // Convert X < 1 to 0 <= X.
+ // Convert X < 1 to 0 >= X.
if (C == 1) {
RHS = LHS;
LHS = DAG.getConstant(0, DL, RHS.getValueType());
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/select-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/select-rv32.mir
new file mode 100644
index 000000000000000..b9bd9b980e2e475
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/select-rv32.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -run-pass=instruction-select --simplify-mir \
+# RUN: -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: select_s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: select_s32
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY]], $x0, 1, [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = COPY $x12
+ %3:gprb(s32) = G_SELECT %0, %1, %2
+ $x10 = COPY %3(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_p0
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: select_p0
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY]], $x0, 1, [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(p0) = COPY $x11
+ %2:gprb(p0) = COPY $x12
+ %3:gprb(p0) = G_SELECT %0, %1, %2
+ $x10 = COPY %3(p0)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_ult
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_ult
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x13
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY2]], [[COPY3]], 4, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = COPY $x12
+ %3:gprb(s32) = COPY $x13
+ %4:gprb(s32) = COPY $x14
+ %5:gprb(s32) = G_ICMP intpred(ult), %2, %3
+ %6:gprb(s32) = G_SELECT %5, %0, %1
+ $x10 = COPY %6(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_ugt
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_ugt
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x13
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY3]], [[COPY2]], 4, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = COPY $x12
+ %3:gprb(s32) = COPY $x13
+ %4:gprb(s32) = COPY $x14
+ %5:gprb(s32) = G_ICMP intpred(ugt), %2, %3
+ %6:gprb(s32) = G_SELECT %5, %0, %1
+ $x10 = COPY %6(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_sgtneg1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_sgtneg1
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x0
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY2]], [[COPY3]], 3, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = COPY $x12
+ %3:gprb(s32) = COPY $x13
+ %4:gprb(s32) = COPY $x14
+ %5:gprb(s32) = G_CONSTANT i32 -1
+ %6:gprb(s32) = G_ICMP intpred(sgt), %2, %5
+ %7:gprb(s32) = G_SELECT %6, %0, %1
+ $x10 = COPY %7(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_slt1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_slt1
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x0
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY3]], [[COPY2]], 3, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s32) = COPY $x10
+ %1:gprb(s32) = COPY $x11
+ %2:gprb(s32) = COPY $x12
+ %3:gprb(s32) = COPY $x13
+ %4:gprb(s32) = COPY $x14
+ %5:gprb(s32) = G_CONSTANT i32 1
+ %6:gprb(s32) = G_ICMP intpred(slt), %2, %5
+ %7:gprb(s32) = G_SELECT %6, %0, %1
+ $x10 = COPY %7(s32)
+ PseudoRET implicit $x10
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/select-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/select-rv64.mir
new file mode 100644
index 000000000000000..6eee273d320be94
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/select-rv64.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -run-pass=instruction-select %s -o - \
+# RUN: | FileCheck %s
+---
+name: select_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: select_s64
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY]], $x0, 1, [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = COPY $x12
+ %3:gprb(s64) = G_SELECT %0, %1, %2
+ $x10 = COPY %3(s64)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_p0
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12
+
+ ; CHECK-LABEL: name: select_p0
+ ; CHECK: liveins: $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY]], $x0, 1, [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(p0) = COPY $x11
+ %2:gprb(p0) = COPY $x12
+ %3:gprb(p0) = G_SELECT %0, %1, %2
+ $x10 = COPY %3(p0)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_ult
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_ult
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x13
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY2]], [[COPY3]], 4, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = COPY $x12
+ %3:gprb(s64) = COPY $x13
+ %4:gprb(s64) = COPY $x14
+ %5:gprb(s64) = G_ICMP intpred(ult), %2, %3
+ %6:gprb(s64) = G_SELECT %5, %0, %1
+ $x10 = COPY %6(s64)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_ugt
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_ugt
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x13
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY3]], [[COPY2]], 4, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = COPY $x12
+ %3:gprb(s64) = COPY $x13
+ %4:gprb(s64) = COPY $x14
+ %5:gprb(s64) = G_ICMP intpred(ugt), %2, %3
+ %6:gprb(s64) = G_SELECT %5, %0, %1
+ $x10 = COPY %6(s64)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_sgtneg1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_sgtneg1
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x0
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY2]], [[COPY3]], 3, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
+ %2:gprb(s64) = COPY $x12
+ %3:gprb(s64) = COPY $x13
+ %4:gprb(s64) = COPY $x14
+ %5:gprb(s64) = G_CONSTANT i64 -1
+ %6:gprb(s64) = G_ICMP intpred(sgt), %2, %5
+ %7:gprb(s64) = G_SELECT %6, %0, %1
+ $x10 = COPY %7(s64)
+ PseudoRET implicit $x10
+
+...
+---
+name: select_icmp_slt1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x10, $x11, $x12, $x13, $x14
+
+ ; CHECK-LABEL: name: select_icmp_slt1
+ ; CHECK: liveins: $x10, $x11, $x12, $x13, $x14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x0
+ ; CHECK-NEXT: [[Select_GPR_Using_CC_GPR:%[0-9]+]]:gpr = Select_GPR_Using_CC_GPR [[COPY3]], [[COPY2]], 3, [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[Select_GPR_Using_CC_GPR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprb(s64) = COPY $x10
+ %1:gprb(s64) = COPY $x11
...
[truncated]
|
9675d25
to
321f3e8
Compare
This patch is stacked on #67614 |
✅ With the latest revision this PR passed the C/C++ code formatter. |
3f25515
to
d576073
Compare
|
||
// Adjust comparisons to use comparison with 0 if possible. | ||
MachineInstr *MaybeConstant = MRI.getVRegDef(RHS); | ||
if (MaybeConstant && MaybeConstant->getOpcode() == TargetOpcode::G_CONSTANT) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we should be using getIConstantVRegValWithLookThrough
? Or getIConstantVRegVal. not sure if we need the lookthrough.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I used matchConstant
which uses the util function which gets the sign extended constant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The getIConstantVRegValWithLookThrough
gives you more of the global aspect. The constant could sit in another block and the hit rate is higher. MRI.getVRegDef(RHS)
could be a COPY
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See
static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like the approach @tschuett may be better than using m_ICst that this patch is currently using, since its taking advantage of the look through. I have updated this patch to use getIConstantVRegSExtVal
which calls getIConstantVRegValWithLookThrough
but returns the sign extended constant, so that we look through COPYs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The C++ code for the G_ICMP optimization with look through ought to better than the DAG code. I have the corresponding AArch64 file. It seems to be the import of the ISD::** for AArch64.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see why we should use look through here if we wouldn't have gotten look through using a tblgen pattern.
Why's that? Wouldn't we prefer an implementation that is more capable of optimizing?
We're still in the bring up phase focusing on -O0 not necessarily optimization. It feels premature to me to make a G_ICMP used by a select capable of being optimized differently than a G_ICMP by itself. Look through isn't free, it would have a compile time cost, though it is probably small it is somewhat counter to -O0. With optimizations enabled, shouldn't constant folding combines have already folded the constant chain? Are we talking about a real optimization opportunity or just something hypothetical? If there's a real missed opportunity than it feels like improving constant folding would give more improvement so that the imported tblgen patterns get the benefit since there are many more of those.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did we come to agreement on whether to use look through or not?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should commit the patch as is. I anticipate more refining of how we handle compares and selects/branches in the future to do optimizations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will commit as is and refinement in the future can be made.
RHS = MI.getOperand(3).getReg(); | ||
|
||
// Adjust comparisons to use comparison with 0 if possible. | ||
if (auto Constant = matchConstant<int64_t>(RHS, MRI)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think matchConstant
is an implementation detail of m_ICst
for the match
function. I think getIConstantVRegVal
is the correct underlying function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to use m_ICst.
This approach works, but we could probably get the same functionality by importing the required SelectionDAG patterns. Patterns like this:
fail to import since we don't have an equivalent instruction for It would be good because then we don't reinvent this wheel, but it also means we're committing to having -O3 selection for G_SELECT at all optimization levels (since SelectionDAG behaves invariantly on optimization levels). We briefly discussed in the past whether any combine that depends on a previous instruction should be at -O1, but we didn't commit to any decision back then. I like the approach you've chosen because it lets us be more granular about our optimization levels. I just thought I'd drop this here so that we're all aware of the design choices here. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There aren't SelectionDAG patterns for everything. The majority of the code in this patch is the equivalent of LowerSelect and translateSetCCForBranch from SelectionDAG. So adding an equivalent of |
Don't create new G_ICMP
simplify cond code and fix returns
35cfe9c
to
e6fbfc9
Compare
If MI is a G_SELECT(G_ICMP(tst, A, B), C, D) then we can use (A, B, tst) as the (LHS, RHS, CC) of the Select_GPR_Using_CC_GPR.