-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use shNadd for scalable stack offsets #88062
[RISCV] Use shNadd for scalable stack offsets #88062
Conversation
If we need to multiply VLENB by 2, 4, or 8 and add it to the stack pointer, we can do so with a shNadd instead of separate shift and add instructions.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesIf we need to multiply VLENB by 2, 4, or 8 and add it to the stack pointer, we can do so with a shNadd instead of separate shift and add instructions. Full diff: https://github.com/llvm/llvm-project/pull/88062.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 713260b090e9cf..ce8fdf9384b8e2 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -204,10 +204,21 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
uint32_t NumOfVReg = ScalableValue / 8;
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
.setMIFlag(Flag);
- TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
- BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
- .addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
- .setMIFlag(Flag);
+
+ if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
+ (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
+ unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
+ (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
+ BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(SrcReg, getKillRegState(KillSrcReg))
+ .setMIFlag(Flag);
+ } else {
+ TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
+ BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
+ .addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
+ .setMIFlag(Flag);
+ }
SrcReg = DestReg;
KillSrcReg = true;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 466ab085b266b4..90794820ddd849 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -7,29 +7,67 @@
; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL
define void @lmul1() nounwind {
-; CHECK-LABEL: lmul1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul1:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 1
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 1
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul1:
+; ZBA: # %bb.0:
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 1
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add sp, a0, sp
+; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul1:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 1
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 1
+; NOMUL-NEXT: add sp, sp, a0
+; NOMUL-NEXT: ret
%v = alloca <vscale x 1 x i64>
ret void
}
define void @lmul2() nounwind {
-; CHECK-LABEL: lmul2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul2:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 1
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 1
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul2:
+; ZBA: # %bb.0:
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 1
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add sp, a0, sp
+; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul2:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 1
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 1
+; NOMUL-NEXT: add sp, sp, a0
+; NOMUL-NEXT: ret
%v = alloca <vscale x 2 x i64>
ret void
}
@@ -75,15 +113,34 @@ define void @lmul8() nounwind {
}
define void @lmul1_and_2() nounwind {
-; CHECK-LABEL: lmul1_and_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul1_and_2:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul1_and_2:
+; ZBA: # %bb.0:
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 2
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh2add sp, a0, sp
+; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul1_and_2:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: add sp, sp, a0
+; NOMUL-NEXT: ret
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 2 x i64>
ret void
@@ -132,15 +189,34 @@ define void @lmul1_and_4() nounwind {
}
define void @lmul2_and_1() nounwind {
-; CHECK-LABEL: lmul2_and_1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul2_and_1:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul2_and_1:
+; ZBA: # %bb.0:
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 2
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh2add sp, a0, sp
+; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: lmul2_and_1:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: add sp, sp, a0
+; NOMUL-NEXT: ret
%v1 = alloca <vscale x 2 x i64>
%v2 = alloca <vscale x 1 x i64>
ret void
@@ -273,19 +349,46 @@ define void @lmul4_and_2_x2_1() nounwind {
define void @gpr_and_lmul1_and_2() nounwind {
-; CHECK-LABEL: gpr_and_lmul1_and_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: li a0, 3
-; CHECK-NEXT: sd a0, 8(sp)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; NOZBA-LABEL: gpr_and_lmul1_and_2:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -16
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: li a0, 3
+; NOZBA-NEXT: sd a0, 8(sp)
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: addi sp, sp, 16
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: gpr_and_lmul1_and_2:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -16
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 2
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: li a0, 3
+; ZBA-NEXT: sd a0, 8(sp)
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh2add sp, a0, sp
+; ZBA-NEXT: addi sp, sp, 16
+; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: gpr_and_lmul1_and_2:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: addi sp, sp, -16
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: li a0, 3
+; NOMUL-NEXT: sd a0, 8(sp)
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: add sp, sp, a0
+; NOMUL-NEXT: addi sp, sp, 16
+; NOMUL-NEXT: ret
%x1 = alloca i64
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 2 x i64>
@@ -396,15 +499,34 @@ define void @lmul_1_2_4_8_x2_1() nounwind {
}
define void @masks() nounwind {
-; CHECK-LABEL: masks:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ret
+; NOZBA-LABEL: masks:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a0, a0, 2
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: masks:
+; ZBA: # %bb.0:
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 2
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh2add sp, a0, sp
+; ZBA-NEXT: ret
+;
+; NOMUL-LABEL: masks:
+; NOMUL: # %bb.0:
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: sub sp, sp, a0
+; NOMUL-NEXT: csrr a0, vlenb
+; NOMUL-NEXT: slli a0, a0, 2
+; NOMUL-NEXT: add sp, sp, a0
+; NOMUL-NEXT: ret
%v1 = alloca <vscale x 1 x i1>
%v2 = alloca <vscale x 2 x i1>
%v4 = alloca <vscale x 4 x i1>
|
You can test this locally with the following command:git-clang-format --diff f5cf98c02655de50401f6547ea181efed6a4c1f1 646e509c90215405453a78c578d240590cab518a -- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index ce8fdf9384..11d0368cef 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -207,8 +207,9 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
(NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
- unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
- (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
+ unsigned Opc = NumOfVReg == 2
+ ? RISCV::SH1ADD
+ : (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
.addReg(ScratchReg, RegState::Kill)
.addReg(SrcReg, getKillRegState(KillSrcReg))
@@ -216,7 +217,8 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
} else {
TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
- .addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
+ .addReg(SrcReg)
+ .addReg(ScratchReg, RegState::Kill)
.setMIFlag(Flag);
}
SrcReg = DestReg;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
If we need to multiply VLENB by 2, 4, or 8 and add it to the stack pointer, we can do so with a shNadd instead of separate shift and add instructions.