Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Use shNadd for scalable stack offsets #88062

Merged

Conversation

preames
Copy link
Collaborator

@preames preames commented Apr 8, 2024

If we need to multiply VLENB by 2, 4, or 8 and add it to the stack pointer, we can do so with a shNadd instead of separate shift and add instructions.

If we need to multiply VLENB by 2, 4, or 8 and add it to the stack
pointer, we can do so with a shNadd instead of separate shift and
add instructions.
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 8, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

If we need to multiply VLENB by 2, 4, or 8 and add it to the stack pointer, we can do so with a shNadd instead of separate shift and add instructions.


Full diff: https://github.com/llvm/llvm-project/pull/88062.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp (+15-4)
  • (modified) llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll (+180-58)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 713260b090e9cf..ce8fdf9384b8e2 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -204,10 +204,21 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
     uint32_t NumOfVReg = ScalableValue / 8;
     BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
         .setMIFlag(Flag);
-    TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
-    BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
-      .addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
-      .setMIFlag(Flag);
+
+    if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
+        (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
+      unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
+        (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
+      BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
+          .addReg(ScratchReg, RegState::Kill)
+          .addReg(SrcReg, getKillRegState(KillSrcReg))
+          .setMIFlag(Flag);
+    } else {
+      TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
+      BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
+          .addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
+          .setMIFlag(Flag);
+    }
     SrcReg = DestReg;
     KillSrcReg = true;
   }
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 466ab085b266b4..90794820ddd849 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -7,29 +7,67 @@
 ; RUN:    | FileCheck %s --check-prefixes=CHECK,NOMUL
 
 define void @lmul1() nounwind {
-; CHECK-LABEL: lmul1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ret
+; NOZBA-LABEL: lmul1:
+; NOZBA:       # %bb.0:
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 1
+; NOZBA-NEXT:    sub sp, sp, a0
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 1
+; NOZBA-NEXT:    add sp, sp, a0
+; NOZBA-NEXT:    ret
+;
+; ZBA-LABEL: lmul1:
+; ZBA:       # %bb.0:
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    slli a0, a0, 1
+; ZBA-NEXT:    sub sp, sp, a0
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    sh1add sp, a0, sp
+; ZBA-NEXT:    ret
+;
+; NOMUL-LABEL: lmul1:
+; NOMUL:       # %bb.0:
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 1
+; NOMUL-NEXT:    sub sp, sp, a0
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 1
+; NOMUL-NEXT:    add sp, sp, a0
+; NOMUL-NEXT:    ret
   %v = alloca <vscale x 1 x i64>
   ret void
 }
 
 define void @lmul2() nounwind {
-; CHECK-LABEL: lmul2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 1
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ret
+; NOZBA-LABEL: lmul2:
+; NOZBA:       # %bb.0:
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 1
+; NOZBA-NEXT:    sub sp, sp, a0
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 1
+; NOZBA-NEXT:    add sp, sp, a0
+; NOZBA-NEXT:    ret
+;
+; ZBA-LABEL: lmul2:
+; ZBA:       # %bb.0:
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    slli a0, a0, 1
+; ZBA-NEXT:    sub sp, sp, a0
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    sh1add sp, a0, sp
+; ZBA-NEXT:    ret
+;
+; NOMUL-LABEL: lmul2:
+; NOMUL:       # %bb.0:
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 1
+; NOMUL-NEXT:    sub sp, sp, a0
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 1
+; NOMUL-NEXT:    add sp, sp, a0
+; NOMUL-NEXT:    ret
   %v = alloca <vscale x 2 x i64>
   ret void
 }
@@ -75,15 +113,34 @@ define void @lmul8() nounwind {
 }
 
 define void @lmul1_and_2() nounwind {
-; CHECK-LABEL: lmul1_and_2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ret
+; NOZBA-LABEL: lmul1_and_2:
+; NOZBA:       # %bb.0:
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    sub sp, sp, a0
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    add sp, sp, a0
+; NOZBA-NEXT:    ret
+;
+; ZBA-LABEL: lmul1_and_2:
+; ZBA:       # %bb.0:
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    slli a0, a0, 2
+; ZBA-NEXT:    sub sp, sp, a0
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    sh2add sp, a0, sp
+; ZBA-NEXT:    ret
+;
+; NOMUL-LABEL: lmul1_and_2:
+; NOMUL:       # %bb.0:
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    sub sp, sp, a0
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    add sp, sp, a0
+; NOMUL-NEXT:    ret
   %v1 = alloca <vscale x 1 x i64>
   %v2 = alloca <vscale x 2 x i64>
   ret void
@@ -132,15 +189,34 @@ define void @lmul1_and_4() nounwind {
 }
 
 define void @lmul2_and_1() nounwind {
-; CHECK-LABEL: lmul2_and_1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ret
+; NOZBA-LABEL: lmul2_and_1:
+; NOZBA:       # %bb.0:
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    sub sp, sp, a0
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    add sp, sp, a0
+; NOZBA-NEXT:    ret
+;
+; ZBA-LABEL: lmul2_and_1:
+; ZBA:       # %bb.0:
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    slli a0, a0, 2
+; ZBA-NEXT:    sub sp, sp, a0
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    sh2add sp, a0, sp
+; ZBA-NEXT:    ret
+;
+; NOMUL-LABEL: lmul2_and_1:
+; NOMUL:       # %bb.0:
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    sub sp, sp, a0
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    add sp, sp, a0
+; NOMUL-NEXT:    ret
   %v1 = alloca <vscale x 2 x i64>
   %v2 = alloca <vscale x 1 x i64>
   ret void
@@ -273,19 +349,46 @@ define void @lmul4_and_2_x2_1() nounwind {
 
 
 define void @gpr_and_lmul1_and_2() nounwind {
-; CHECK-LABEL: gpr_and_lmul1_and_2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    li a0, 3
-; CHECK-NEXT:    sd a0, 8(sp)
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; NOZBA-LABEL: gpr_and_lmul1_and_2:
+; NOZBA:       # %bb.0:
+; NOZBA-NEXT:    addi sp, sp, -16
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    sub sp, sp, a0
+; NOZBA-NEXT:    li a0, 3
+; NOZBA-NEXT:    sd a0, 8(sp)
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    add sp, sp, a0
+; NOZBA-NEXT:    addi sp, sp, 16
+; NOZBA-NEXT:    ret
+;
+; ZBA-LABEL: gpr_and_lmul1_and_2:
+; ZBA:       # %bb.0:
+; ZBA-NEXT:    addi sp, sp, -16
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    slli a0, a0, 2
+; ZBA-NEXT:    sub sp, sp, a0
+; ZBA-NEXT:    li a0, 3
+; ZBA-NEXT:    sd a0, 8(sp)
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    sh2add sp, a0, sp
+; ZBA-NEXT:    addi sp, sp, 16
+; ZBA-NEXT:    ret
+;
+; NOMUL-LABEL: gpr_and_lmul1_and_2:
+; NOMUL:       # %bb.0:
+; NOMUL-NEXT:    addi sp, sp, -16
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    sub sp, sp, a0
+; NOMUL-NEXT:    li a0, 3
+; NOMUL-NEXT:    sd a0, 8(sp)
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    add sp, sp, a0
+; NOMUL-NEXT:    addi sp, sp, 16
+; NOMUL-NEXT:    ret
   %x1 = alloca i64
   %v1 = alloca <vscale x 1 x i64>
   %v2 = alloca <vscale x 2 x i64>
@@ -396,15 +499,34 @@ define void @lmul_1_2_4_8_x2_1() nounwind {
 }
 
 define void @masks() nounwind {
-; CHECK-LABEL: masks:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    sub sp, sp, a0
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 2
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ret
+; NOZBA-LABEL: masks:
+; NOZBA:       # %bb.0:
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    sub sp, sp, a0
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    slli a0, a0, 2
+; NOZBA-NEXT:    add sp, sp, a0
+; NOZBA-NEXT:    ret
+;
+; ZBA-LABEL: masks:
+; ZBA:       # %bb.0:
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    slli a0, a0, 2
+; ZBA-NEXT:    sub sp, sp, a0
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    sh2add sp, a0, sp
+; ZBA-NEXT:    ret
+;
+; NOMUL-LABEL: masks:
+; NOMUL:       # %bb.0:
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    sub sp, sp, a0
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 2
+; NOMUL-NEXT:    add sp, sp, a0
+; NOMUL-NEXT:    ret
   %v1 = alloca <vscale x 1 x i1>
   %v2 = alloca <vscale x 2 x i1>
   %v4 = alloca <vscale x 4 x i1>

Copy link

github-actions bot commented Apr 8, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff f5cf98c02655de50401f6547ea181efed6a4c1f1 646e509c90215405453a78c578d240590cab518a -- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index ce8fdf9384..11d0368cef 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -207,8 +207,9 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
 
     if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
         (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
-      unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
-        (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
+      unsigned Opc = NumOfVReg == 2
+                         ? RISCV::SH1ADD
+                         : (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
       BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
           .addReg(ScratchReg, RegState::Kill)
           .addReg(SrcReg, getKillRegState(KillSrcReg))
@@ -216,7 +217,8 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
     } else {
       TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
       BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
-          .addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
+          .addReg(SrcReg)
+          .addReg(ScratchReg, RegState::Kill)
           .setMIFlag(Flag);
     }
     SrcReg = DestReg;

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Contributor

@lukel97 lukel97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@preames preames merged commit e47fd09 into llvm:main Apr 9, 2024
5 of 6 checks passed
@preames preames deleted the pr-riscv-shnadd-for-scalable-stack-offset branch April 9, 2024 14:29
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants