-
Notifications
You must be signed in to change notification settings - Fork 10.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Fix stack size computation when M extension disabled #78602
Conversation
Ensure that getVLENFactoredAmount does not fail when the scale amount requires the use of a non-trivial multiplication but the M extension is not enabled. In such case, perform the multiplication using shifts and adds.
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-backend-risc-v Author: Simeon K (simeonkr) ChangesEnsure that getVLENFactoredAmount does not fail when the scale amount requires the use of a non-trivial multiplication but the M extension is not enabled. In such case, perform the multiplication using shifts and adds. Full diff: https://github.com/llvm/llvm-project/pull/78602.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 857e8979762cdc6..c3ebf6ce83e3f14 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3159,18 +3159,39 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(ScaledRegister, RegState::Kill)
.addReg(DestReg, RegState::Kill)
.setMIFlag(Flag);
- } else {
+ } else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) {
Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
movImm(MBB, II, DL, N, NumOfVReg, Flag);
- if (!STI.hasStdExtM() && !STI.hasStdExtZmmul())
- MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
- MF.getFunction(),
- "M- or Zmmul-extension must be enabled to calculate the vscaled size/"
- "offset."});
BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
.addReg(DestReg, RegState::Kill)
.addReg(N, RegState::Kill)
.setMIFlag(Flag);
+ } else {
+ Register Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, II, DL, get(RISCV::ADDI), Acc)
+ .addReg(RISCV::X0)
+ .addImm(0)
+ .setMIFlag(Flag);
+ uint32_t PrevShiftAmount = 0;
+ for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) {
+ if (NumOfVReg & (1 << ShiftAmount)) {
+ if (ShiftAmount)
+ BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(ShiftAmount - PrevShiftAmount)
+ .setMIFlag(Flag);
+ if (NumOfVReg >> (ShiftAmount + 1))
+ BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
+ .addReg(Acc, RegState::Kill)
+ .addReg(DestReg)
+ .setMIFlag(Flag);
+ PrevShiftAmount = ShiftAmount;
+ }
+ }
+ BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addReg(Acc)
+ .setMIFlag(Flag);
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index e6adb725110a401..78bec6c68c3f6e2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL
define void @lmul1() nounwind {
; CHECK-LABEL: lmul1:
@@ -243,6 +245,26 @@ define void @lmul4_and_2_x2_1() nounwind {
; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; ZBA-NEXT: addi sp, sp, 48
; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul4_and_2_x2_1:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: addi sp, sp, -48
+; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: addi s0, sp, 48
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: li a1, 0
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: add a1, a1, a0
+; NOMUL-NEXT: slli a0, a0, 1
+; NOMUL-NEXT: add a0, a0, a1
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: andi sp, sp, -32
+; NOMUL-NEXT: addi sp, s0, -48
+; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: addi sp, sp, 48
+; NOMUL-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v3 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 2 x i64>
@@ -425,6 +447,26 @@ define void @lmul_8_x5() nounwind {
; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZBA-NEXT: addi sp, sp, 80
; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul_8_x5:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: addi sp, sp, -80
+; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: addi s0, sp, 80
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: li a1, 0
+; NOMUL-NEXT: slli a0, a0, 3
+; NOMUL-NEXT: add a1, a1, a0
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: add a0, a0, a1
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: andi sp, sp, -64
+; NOMUL-NEXT: addi sp, s0, -80
+; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: addi sp, sp, 80
+; NOMUL-NEXT: ret
%v1 = alloca <vscale x 8 x i64>
%v2 = alloca <vscale x 8 x i64>
%v3 = alloca <vscale x 8 x i64>
@@ -467,6 +509,26 @@ define void @lmul_8_x9() nounwind {
; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; ZBA-NEXT: addi sp, sp, 80
; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul_8_x9:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: addi sp, sp, -80
+; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; NOMUL-NEXT: addi s0, sp, 80
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: li a1, 0
+; NOMUL-NEXT: slli a0, a0, 3
+; NOMUL-NEXT: add a1, a1, a0
+; NOMUL-NEXT: slli a0, a0, 3
+; NOMUL-NEXT: add a0, a0, a1
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: andi sp, sp, -64
+; NOMUL-NEXT: addi sp, s0, -80
+; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; NOMUL-NEXT: addi sp, sp, 80
+; NOMUL-NEXT: ret
%v1 = alloca <vscale x 8 x i64>
%v2 = alloca <vscale x 8 x i64>
%v3 = alloca <vscale x 8 x i64>
|
@simeonkr do you have a core where this is needed? Although the RISC-V ISA allows vector extensions without M, I'm wondering if that case is worth supporting in LLVM. |
@asb I surmise there may be certain embedded RV32I cores out there that might require it, but we don't have any on our end. However, the code in question was the only place I found where we emit a mul instruction, so I don't anticipate much effort beyond this patch to support this configuration. The original motivation for this came from the fact that certain tests are defined without |
Sounds good. If we reasonably expect this is the only place that needs fixing then it seems perfectly sensible to support. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@simeonkr , @asb Permit a small info addition here:
|
This patch is specifically for Zve* or V without M. rv32i should already support m-less code emission using library calls for multiply. |
Sorry, thanks for this highlight. V without M that is really weird (but valid indeed), well yes now I see the very purpose of this. Thanks again. |
Ensure that getVLENFactoredAmount does not fail when the scale amount requires the use of a non-trivial multiplication but the M extension is not enabled. In such case, perform the multiplication using shifts and adds.