-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Optimize VRELOAD/VSPILL lowering if VLEN is known. #74421
Conversation
Instead of using VLENB and a shift, (VLEN/8)*LMUL directly into a register. We could go further and use ADDI, but that would be more intrusive to the code structure. My primary goal is to remove the read of VLENB which might be expensive if it's not optimized in hardware.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesInstead of using VLENB and a shift, load (VLEN/8)*LMUL directly into a register. We could go further and use ADDI, but that would be more intrusive to the code structure. My primary goal is to remove the read of VLENB which might be expensive if it's not optimized in hardware. Full diff: https://github.com/llvm/llvm-project/pull/74421.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 475ccc01df1fa..a3c19115bd317 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -299,12 +299,20 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
"Unexpected subreg numbering");
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
+ // Optimize for constant VLEN.
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+ if (STI.getRealMinVLen() == STI.getRealMaxVLen()) {
+ const int64_t VLENB = STI.getRealMinVLen() / 8;
+ int64_t Offset = VLENB * LMUL;
+ STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
+ } else {
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
+ uint32_t ShiftAmount = Log2_32(LMUL);
+ if (ShiftAmount != 0)
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
+ .addReg(VL)
+ .addImm(ShiftAmount);
+ }
Register SrcReg = II->getOperand(0).getReg();
Register Base = II->getOperand(1).getReg();
@@ -368,12 +376,20 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
"Unexpected subreg numbering");
Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
+ // Optimize for constant VLEN.
+ const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
+ if (STI.getRealMinVLen() == STI.getRealMaxVLen()) {
+ const int64_t VLENB = STI.getRealMinVLen() / 8;
+ int64_t Offset = VLENB * LMUL;
+ STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
+ } else {
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
+ uint32_t ShiftAmount = Log2_32(LMUL);
+ if (ShiftAmount != 0)
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
+ .addReg(VL)
+ .addImm(ShiftAmount);
+ }
Register DestReg = II->getOperand(0).getReg();
Register Base = II->getOperand(1).getReg();
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
index a8a165e549296..407c782d3377a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck --check-prefix=SPILL-O0 %s
; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+m -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32:
@@ -56,6 +58,28 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -32
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 1 x i32>,<vscale x 1 x i32>} @llvm.riscv.vlseg2.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, ptr %base, i32 %vl)
call void asm sideeffect "",
@@ -116,6 +140,28 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -32
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.riscv.vlseg2.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, ptr %base, i32 %vl)
call void asm sideeffect "",
@@ -179,6 +225,28 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -64
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg2.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, ptr %base, i32 %vl)
call void asm sideeffect "",
@@ -242,6 +310,28 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -128
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 64
+; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 64
+; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 8 x i32>,<vscale x 8 x i32>} @llvm.riscv.vlseg2.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, ptr %base, i32 %vl)
call void asm sideeffect "",
@@ -314,6 +404,32 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -96
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg3.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, ptr %base, i32 %vl)
call void asm sideeffect "",
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
index a3c0ed47e113a..1c1544b4efa0b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck --check-prefix=SPILL-O0 %s
; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+m -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32:
@@ -56,6 +58,28 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -32
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 1 x i32>,<vscale x 1 x i32>} @llvm.riscv.vlseg2.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef, ptr %base, i64 %vl)
call void asm sideeffect "",
@@ -116,6 +140,28 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -32
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 16
+; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.riscv.vlseg2.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef, ptr %base, i64 %vl)
call void asm sideeffect "",
@@ -179,6 +225,28 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -64
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg2.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, ptr %base, i64 %vl)
call void asm sideeffect "",
@@ -242,6 +310,28 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -128
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 64
+; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 64
+; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 8 x i32>,<vscale x 8 x i32>} @llvm.riscv.vlseg2.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef, ptr %base, i64 %vl)
call void asm sideeffect "",
@@ -314,6 +404,32 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O2-VLEN128-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O2-VLEN128: # %bb.0: # %entry
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -16
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, -96
+; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0)
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VLEN128-NEXT: #APP
+; SPILL-O2-VLEN128-NEXT: #NO_APP
+; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
+; SPILL-O2-VLEN128-NEXT: li a1, 32
+; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
+; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
+; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
+; SPILL-O2-VLEN128-NEXT: ret
entry:
%0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg3.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef, ptr %base, i64 %vl)
call void asm sideeffect "",
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not familiar with RISCVRegisterInfo.cpp but LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, I guess we have few rooms to optimize with the compilation time constant VLEN around the frame stuffs, and it's a good start.
LGTM as well. |
Just curious if there's a known target where that's true? A read of a CSR holding a design-time constant being cheap seems like a reasonable assumption for the compiler to make. |
Sadly its been missed as an optimization in the first releases of SiFive x280, p470 and p670. |
Instead of using VLENB and a shift, load (VLEN/8)*LMUL directly into a register. We could go further and use ADDI, but that would be more intrusive to the code structure.
My primary goal is to remove the read of VLENB which might be expensive if it's not optimized in hardware.