-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][GISel] Add really basic support for FP regbank selection for G_LOAD/G_STORE. #70896
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-globalisel Author: Craig Topper (topperc) ChangesCoerce the register bank based on the users of the G_LOAD or the defining instruction for the G_STORE. No tests for RV32 because we need to make s64 load/store legal for rv32 with D extension first. Full diff: https://github.com/llvm/llvm-project/pull/70896.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index b956cd4782b7943..7baafe2b177663c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -868,6 +868,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Check if that store is fed by fp instructions.
if (OpRegBankIdx[0] == PMI_FirstGPR) {
Register VReg = MI.getOperand(0).getReg();
+ assert(VReg);
if (!VReg)
break;
MachineInstr *DefMI = MRI.getVRegDef(VReg);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index f005948d2094445..aa59dff5266b3ea 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -103,6 +103,51 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
}
}
+// TODO: Make this more like AArch64?
+static bool onlyUsesFP(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCMP:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+// TODO: Make this more like AArch64?
+static bool onlyDefinesFP(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
const RegisterBankInfo::InstructionMapping &
RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
@@ -149,11 +194,44 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
- case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
- case TargetOpcode::G_STORE:
break;
+ case TargetOpcode::G_LOAD:
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ // If we have at least one direct use in a FP instruction,
+ // assume this was a floating point load in the IR. If it was
+ // not, we would have had a bitcast before reaching that
+ // instruction.
+ return onlyUsesFP(UseMI);
+ })) {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.getSizeInBits() == 64)
+ OperandsMapping = getOperandsMapping(
+ {&RISCV::ValueMappings[RISCV::FPR64Idx], GPRValueMapping});
+ else
+ OperandsMapping = getOperandsMapping(
+ {&RISCV::ValueMappings[RISCV::FPR32Idx], GPRValueMapping});
+ }
+
+ break;
+ case TargetOpcode::G_STORE: {
+ MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(0).getReg());
+ if (onlyDefinesFP(*DefMI)) {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.getSizeInBits() == 64)
+ OperandsMapping = getOperandsMapping(
+ {&RISCV::ValueMappings[RISCV::FPR64Idx], GPRValueMapping});
+ else
+ OperandsMapping = getOperandsMapping(
+ {&RISCV::ValueMappings[RISCV::FPR32Idx], GPRValueMapping});
+ }
+ break;
+ }
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FRAME_INDEX:
case TargetOpcode::G_GLOBAL_VALUE:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-use-def.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-use-def.mir
new file mode 100644
index 000000000000000..a741d3345be19d3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-use-def.mir
@@ -0,0 +1,105 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+d -run-pass=regbankselect \
+# RUN: -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck %s
+
+---
+name: fp_store_f32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10, $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fp_store_f32
+ ; CHECK: liveins: $x10, $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f10_f
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fprb(s32) = COPY $f11_f
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: G_STORE [[FADD]](s32), [[COPY]](p0) :: (store (s32))
+ ; CHECK-NEXT: PseudoRET
+ %0:_(p0) = COPY $x10
+ %1:_(s32) = COPY $f10_f
+ %2:_(s32) = COPY $f11_f
+ %3:_(s32) = G_FADD %1, %2
+ G_STORE %3(s32), %0(p0) :: (store (s32))
+ PseudoRET
+
+...
+---
+name: fp_store_f64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10, $f10_d, $f11_d
+
+ ; CHECK-LABEL: name: fp_store_f64
+ ; CHECK: liveins: $x10, $f10_d, $f11_d
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fprb(s64) = COPY $f11_d
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s64) = G_FADD [[COPY1]], [[COPY2]]
+ ; CHECK-NEXT: G_STORE [[FADD]](s64), [[COPY]](p0) :: (store (s64))
+ ; CHECK-NEXT: PseudoRET
+ %0:_(p0) = COPY $x10
+ %1:_(s64) = COPY $f10_d
+ %2:_(s64) = COPY $f11_d
+ %3:_(s64) = G_FADD %1, %2
+ G_STORE %3(s64), %0(p0) :: (store (s64))
+ PseudoRET
+
+...
+---
+name: fp_load_f32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10, $f10_f
+
+ ; CHECK-LABEL: name: fp_load_f32
+ ; CHECK: liveins: $x10, $f10_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f10_f
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fprb(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s32) = G_FADD [[LOAD]], [[COPY1]]
+ ; CHECK-NEXT: $f10_f = COPY [[FADD]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $f10_f
+ %0:_(p0) = COPY $x10
+ %1:_(s32) = COPY $f10_f
+ %2:_(s32) = G_LOAD %0(p0) :: (load (s32))
+ %3:_(s32) = G_FADD %2, %1
+ $f10_f = COPY %3(s32)
+ PseudoRET implicit $f10_f
+
+...
+---
+name: fp_load_f64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10, $f10_d
+
+ ; CHECK-LABEL: name: fp_load_f64
+ ; CHECK: liveins: $x10, $f10_d
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+ ; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s64) = G_FADD [[LOAD]], [[COPY1]]
+ ; CHECK-NEXT: $f10_d = COPY [[FADD]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $f10_d
+ %0:_(p0) = COPY $x10
+ %1:_(s64) = COPY $f10_d
+ %2:_(s64) = G_LOAD %0(p0) :: (load (s64))
+ %3:_(s64) = G_FADD %2, %1
+ $f10_d = COPY %3(s64)
+ PseudoRET implicit $f10_d
+
+...
|
93028d3
to
b734199
Compare
@@ -869,6 +869,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { | |||
// Check if that store is fed by fp instructions. | |||
if (OpRegBankIdx[0] == PMI_FirstGPR) { | |||
Register VReg = MI.getOperand(0).getReg(); | |||
assert(VReg); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops I didn't mean to leave that in. I was trying to understand why AArch64 considered the possibility of the operand not being a register. But that assert doesn't fail in any lit test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it weren't a register, getReg would have asserted anyway. The verifier should be rejecting NoRegister register operands for G_* instructions too (plus we have very few G_* instructions that have immediate operands)
592ad4c
to
bfe2ec6
Compare
break; | ||
case TargetOpcode::G_LOAD: { | ||
LLT Ty = MRI.getType(MI.getOperand(0).getReg()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getType
can return LLT{}
. Should we fail if !isValid()
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't the MIR be ill-formed if the register didn't have a type?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLT is not valid if the G_LOAD/G_STORE is on a physical register. I thought it might be well-formed to write:
---
name: foo
legalized: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $x10
%0:_(s32) = G_LOAD $x10(p0) :: (load (s32))
G_STORE %0(s32), $x10(p0) :: (store (s32))
PseudoRET
...
but this seems to fail sooner:
static unsigned int llvm::Register::virtReg2Index(llvm::Register): Assertion `Reg.isVirtual() && "Not a virtual register"' failed.
Looks like we don't need an isValid check.
break; | ||
} | ||
case TargetOpcode::G_STORE: { | ||
LLT Ty = MRI.getType(MI.getOperand(0).getReg()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getType can return LLT{}. Should we fail if !isValid()?
llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-load-store.mir
Outdated
Show resolved
Hide resolved
; RV64-NEXT: {{ $}} | ||
; RV64-NEXT: [[COPY:%[0-9]+]]:gprb(p0) = COPY $x10 | ||
; RV64-NEXT: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $f10_d | ||
; RV64-NEXT: [[COPY2:%[0-9]+]]:gprb(s64) = COPY [[COPY1]](s64) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like there is an extra copy here. Is that needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first copy is created by call lowering, the second copy is created by regbank selection because this patch doesn't see the copy to know it should be an FP store. I'll fix that in another patch.
// Use FPR64 for s64 stores on rv32. | ||
if (GPRSize == 32 && Ty.getSizeInBits() == 64) { | ||
OperandsMapping = | ||
getOperandsMapping({getFPValueMapping(64), GPRValueMapping}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On rv32, are you guaranteed to have FPR64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No that's not a guarantee architecturally. But if s64 got through the legalizer then it must be supported.
…G_LOAD/G_STORE. Coerce the register bank based on the users of the G_LOAD or the defining instruction for the G_STORE. No tests for RV32 because we need to make s64 load/store legal for rv32 with D extension first.
bfe2ec6
to
741b43c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
…G_LOAD/G_STORE. (llvm#70896) Coerce the register bank based on the users of the G_LOAD or the defining instruction for the G_STORE. s64 on rv32 is handled by forcing the FPRB register bank.
Coerce the register bank based on the users of the G_LOAD or the defining instruction for the G_STORE.
s64 on rv32 is handled by forcing the FPR register bank.