From eae68fc49a0f29ab63e9e8f65c995ac1ac0fb0a2 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Tue, 12 Aug 2025 20:58:09 +0800 Subject: [PATCH 01/10] [RISCV] Optimize the spill/reload of segment registers The simplest way is: 1. Save `vtype` to a scalar register. 2. Insert a `vsetvli`. 3. Use segment load/store. 4. Restore `vtype` via `vsetvl`. But `vsetvl` is usually slow, so this PR is not in this way. Instead, we use wider whole load/store instructions if the register encoding is aligned. We have done the same optimization for COPY in https://github.com/llvm/llvm-project/pull/84455. We found this suboptimal implementation when porting some video codec kernels via RVV intrinsics. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 14 +- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 227 ++++++++++-------- llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 3 + .../early-clobber-tied-def-subreg-liveness.ll | 20 +- ...regalloc-last-chance-recoloring-failure.ll | 24 +- .../CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll | 132 +++------- .../CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll | 132 +++------- llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 33 +-- 8 files changed, 216 insertions(+), 369 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 085064eee896a..7b4a1de167695 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -382,7 +382,7 @@ void RISCVInstrInfo::copyPhysRegVector( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RegClass) const { - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); RISCVVType::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags); unsigned NF = RISCVRI::getNF(RegClass->TSFlags); @@ -444,13 +444,7 @@ void RISCVInstrInfo::copyPhysRegVector( return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; }; - auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass, - uint16_t Encoding) { - MCRegister Reg = RISCV::V0 + Encoding; - if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1) - return Reg; - return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); - }; + while (I != NumRegs) { // For non-segment copying, we only do this once as the registers are always // aligned. @@ -470,9 +464,9 @@ void RISCVInstrInfo::copyPhysRegVector( // Emit actual copying. // For reversed copying, the encoding should be decreased. - MCRegister ActualSrcReg = FindRegWithEncoding( + MCRegister ActualSrcReg = TRI->findVRegWithEncoding( RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding); - MCRegister ActualDstReg = FindRegWithEncoding( + MCRegister ActualDstReg = TRI->findVRegWithEncoding( RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding); auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 8a3c8e2a1c1cf..34f69c1bb1883 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -389,6 +389,22 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, .setMIFlag(Flag); } +static std::tuple +getSpillReloadInfo(unsigned Idx, unsigned Total, uint16_t RegEncoding, + bool IsSpill) { + if (Idx + 8 <= Total && RegEncoding % 8 == 0) + return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, + IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V}; + if (Idx + 4 <= Total && RegEncoding % 4 == 0) + return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, + IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V}; + if (Idx + 2 <= Total && RegEncoding % 2 == 0) + return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, + IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V}; + return {RISCVVType::LMUL_1, RISCV::VRRegClass, + IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V}; +} + // Split a VSPILLx_Mx pseudo into multiple whole register stores separated by // LMUL*VLENB bytes. void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { @@ -403,47 +419,11 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode()); unsigned NF = ZvlssegInfo->first; unsigned LMUL = ZvlssegInfo->second; - assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations."); - unsigned Opcode, SubRegIdx; - switch (LMUL) { - default: - llvm_unreachable("LMUL must be 1, 2, or 4."); - case 1: - Opcode = RISCV::VS1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - break; - case 2: - Opcode = RISCV::VS2R_V; - SubRegIdx = RISCV::sub_vrm2_0; - break; - case 4: - Opcode = RISCV::VS4R_V; - SubRegIdx = RISCV::sub_vrm4_0; - break; - } - static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, - "Unexpected subreg numbering"); - - Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Optimize for constant VLEN. - if (auto VLEN = STI.getRealVLen()) { - const int64_t VLENB = *VLEN / 8; - int64_t Offset = VLENB * LMUL; - STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); - } else { - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); - uint32_t ShiftAmount = Log2_32(LMUL); - if (ShiftAmount != 0) - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) - .addReg(VL) - .addImm(ShiftAmount); - } + unsigned NumRegs = NF * LMUL; + assert(NumRegs <= 8 && "Invalid NF/LMUL combinations."); Register SrcReg = II->getOperand(0).getReg(); + uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); Register Base = II->getOperand(1).getReg(); bool IsBaseKill = II->getOperand(1).isKill(); Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass); @@ -451,23 +431,53 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { auto *OldMMO = *(II->memoperands_begin()); LocationSize OldLoc = OldMMO->getSize(); assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF)); - TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF); - auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize); - for (unsigned I = 0; I < NF; ++I) { - // Adding implicit-use of super register to describe we are using part of - // super register, that prevents machine verifier complaining when part of - // subreg is undef, see comment in MachineVerifier::checkLiveness for more - // detail. - BuildMI(MBB, II, DL, TII->get(Opcode)) - .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I)) - .addReg(Base, getKillRegState(I == NF - 1)) - .addMemOperand(NewMMO) - .addReg(SrcReg, RegState::Implicit); - if (I != NF - 1) + TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs); + + Register VLENB = 0; + unsigned PreSavedNum = 0; + unsigned I = 0; + while (I != NumRegs) { + auto [LMulSaved, RegClass, Opcode] = + getSpillReloadInfo(I, NumRegs, SrcEncoding, true); + auto [NumSaved, _] = RISCVVType::decodeVLMUL(LMulSaved); + if (PreSavedNum) { + Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); + if (auto VLEN = STI.getRealVLen()) { + const int64_t VLENB = *VLEN / 8; + int64_t Offset = VLENB * PreSavedNum; + STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); + } else { + if (!VLENB) { + VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB); + } + uint32_t ShiftAmount = Log2_32(PreSavedNum); + if (ShiftAmount == 0) + Step = VLENB; + else + BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) + .addReg(VLENB) + .addImm(ShiftAmount); + } + BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(VL, getKillRegState(I == NF - 2)); - Base = NewBase; + .addReg(Step, getKillRegState(true)); + Base = NewBase; + } + + MCRegister ActualSrcReg = findVRegWithEncoding(RegClass, SrcEncoding); + + BuildMI(MBB, II, DL, TII->get(Opcode)) + .addReg(ActualSrcReg) + .addReg(Base, getKillRegState(I + NumSaved == NumRegs)) + .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), + NewSize * NumSaved)) + .addReg(SrcReg, RegState::Implicit); + + PreSavedNum = NumSaved; + SrcEncoding += NumSaved; + I += NumSaved; } II->eraseFromParent(); } @@ -486,65 +496,63 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode()); unsigned NF = ZvlssegInfo->first; unsigned LMUL = ZvlssegInfo->second; - assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations."); - unsigned Opcode, SubRegIdx; - switch (LMUL) { - default: - llvm_unreachable("LMUL must be 1, 2, or 4."); - case 1: - Opcode = RISCV::VL1RE8_V; - SubRegIdx = RISCV::sub_vrm1_0; - break; - case 2: - Opcode = RISCV::VL2RE8_V; - SubRegIdx = RISCV::sub_vrm2_0; - break; - case 4: - Opcode = RISCV::VL4RE8_V; - SubRegIdx = RISCV::sub_vrm4_0; - break; - } - static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, - "Unexpected subreg numbering"); - static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, - "Unexpected subreg numbering"); - - Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - // Optimize for constant VLEN. - if (auto VLEN = STI.getRealVLen()) { - const int64_t VLENB = *VLEN / 8; - int64_t Offset = VLENB * LMUL; - STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset); - } else { - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL); - uint32_t ShiftAmount = Log2_32(LMUL); - if (ShiftAmount != 0) - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL) - .addReg(VL) - .addImm(ShiftAmount); - } + unsigned NumRegs = NF * LMUL; + assert(NumRegs <= 8 && "Invalid NF/LMUL combinations."); Register DestReg = II->getOperand(0).getReg(); + uint16_t DestEncoding = TRI->getEncodingValue(DestReg); Register Base = II->getOperand(1).getReg(); bool IsBaseKill = II->getOperand(1).isKill(); Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass); + auto *OldMMO = *(II->memoperands_begin()); LocationSize OldLoc = OldMMO->getSize(); assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF)); - TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF); - auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize); - for (unsigned I = 0; I < NF; ++I) { - BuildMI(MBB, II, DL, TII->get(Opcode), - TRI->getSubReg(DestReg, SubRegIdx + I)) - .addReg(Base, getKillRegState(I == NF - 1)) - .addMemOperand(NewMMO); - if (I != NF - 1) + TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs); + + Register VLENB = 0; + unsigned PreReloadedNum = 0; + unsigned I = 0; + while (I != NumRegs) { + auto [LMulReloaded, RegClass, Opcode] = + getSpillReloadInfo(I, NumRegs, DestEncoding, false); + auto [NumReloaded, _] = RISCVVType::decodeVLMUL(LMulReloaded); + if (PreReloadedNum) { + Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); + if (auto VLEN = STI.getRealVLen()) { + const int64_t VLENB = *VLEN / 8; + int64_t Offset = VLENB * PreReloadedNum; + STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); + } else { + if (!VLENB) { + VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB); + } + uint32_t ShiftAmount = Log2_32(PreReloadedNum); + if (ShiftAmount == 0) + Step = VLENB; + else + BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) + .addReg(VLENB) + .addImm(ShiftAmount); + } + BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(VL, getKillRegState(I == NF - 2)); - Base = NewBase; + .addReg(Step, getKillRegState(true)); + Base = NewBase; + } + + MCRegister ActualDestReg = findVRegWithEncoding(RegClass, DestEncoding); + + BuildMI(MBB, II, DL, TII->get(Opcode), ActualDestReg) + .addReg(Base, getKillRegState(I + NumReloaded == NumRegs)) + .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), + NewSize * NumReloaded)); + + PreReloadedNum = NumReloaded; + DestEncoding += NumReloaded; + I += NumReloaded; } II->eraseFromParent(); } @@ -635,9 +643,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Handle spill/fill of synthetic register classes for segment operations to - // ensure correctness in the edge case one gets spilled. There are many - // possible optimizations here, but given the extreme rarity of such spills, - // we prefer simplicity of implementation for now. + // ensure correctness in the edge case one gets spilled. switch (MI.getOpcode()) { case RISCV::PseudoVSPILL2_M1: case RISCV::PseudoVSPILL2_M2: @@ -1052,3 +1058,12 @@ bool RISCVRegisterInfo::getRegAllocationHints( return BaseImplRetVal; } + +Register +RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass, + uint16_t Encoding) const { + MCRegister Reg = RISCV::V0 + Encoding; + if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1) + return Reg; + return getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index b368399e2ad14..ffb4f84afb9a3 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -144,6 +144,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + Register findVRegWithEncoding(const TargetRegisterClass &RegClass, + uint16_t Encoding) const; + static bool isVRRegClass(const TargetRegisterClass *RC) { return RISCVRI::isVRegClass(RC->TSFlags) && RISCVRI::getNF(RC->TSFlags) == 1; diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 0afdcdccd9246..6a7c73672bf6c 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -40,15 +40,7 @@ define void @_Z3foov() { ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45) ; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40) ; CHECK-NEXT: #APP @@ -59,15 +51,7 @@ define void @_Z3foov() { ; CHECK-NEXT: addi a0, a0, 928 ; CHECK-NEXT: vmsbc.vx v0, v8, a0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl2r.v v14, (a0) # vscale x 16-byte Folded Reload +; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll index 878b180e34c01..f3c88923c15e2 100644 --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -32,11 +32,7 @@ define void @last_chance_recoloring_failure() { ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vs4r.v v20, (a0) # vscale x 32-byte Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; CHECK-NEXT: li s0, 36 ; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v16, v8, v12, v0.t @@ -47,11 +43,7 @@ define void @last_chance_recoloring_failure() { ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma @@ -92,11 +84,7 @@ define void @last_chance_recoloring_failure() { ; SUBREGLIVENESS-NEXT: slli a0, a0, 3 ; SUBREGLIVENESS-NEXT: add a0, sp, a0 ; SUBREGLIVENESS-NEXT: addi a0, a0, 16 -; SUBREGLIVENESS-NEXT: csrr a1, vlenb -; SUBREGLIVENESS-NEXT: slli a1, a1, 2 -; SUBREGLIVENESS-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill -; SUBREGLIVENESS-NEXT: add a0, a0, a1 -; SUBREGLIVENESS-NEXT: vs4r.v v20, (a0) # vscale x 32-byte Folded Spill +; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill ; SUBREGLIVENESS-NEXT: li s0, 36 ; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma ; SUBREGLIVENESS-NEXT: vfwadd.vv v16, v8, v12, v0.t @@ -107,11 +95,7 @@ define void @last_chance_recoloring_failure() { ; SUBREGLIVENESS-NEXT: slli a0, a0, 3 ; SUBREGLIVENESS-NEXT: add a0, sp, a0 ; SUBREGLIVENESS-NEXT: addi a0, a0, 16 -; SUBREGLIVENESS-NEXT: csrr a1, vlenb -; SUBREGLIVENESS-NEXT: slli a1, a1, 2 -; SUBREGLIVENESS-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload -; SUBREGLIVENESS-NEXT: add a0, a0, a1 -; SUBREGLIVENESS-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload +; SUBREGLIVENESS-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload ; SUBREGLIVENESS-NEXT: addi a0, sp, 16 ; SUBREGLIVENESS-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload ; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll index 663bb1fc15517..d69a166b04080 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -41,14 +41,11 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -64,15 +61,11 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 -; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 @@ -108,14 +101,11 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -161,14 +151,11 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -184,15 +171,11 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 -; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 @@ -228,14 +211,11 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -283,17 +263,12 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -309,15 +284,11 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 -; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64 @@ -353,17 +324,12 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma @@ -411,17 +377,12 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 2 -; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 2 -; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -437,15 +398,11 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 64 -; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 64 ; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 64 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128 @@ -481,17 +438,12 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 -; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 -; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma @@ -540,23 +492,19 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-NEXT: slli a1, a1, 2 ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: li a1, 6 ; SPILL-O2-NEXT: mul a0, a0, a1 @@ -571,21 +519,17 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 -; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: li a1, 64 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret @@ -621,23 +565,19 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; SPILL-O2-VSETVLI-NEXT: li a1, 6 ; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll index dc0e8fd987c6d..610443845389a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -41,14 +41,11 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -64,15 +61,11 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 -; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 @@ -108,14 +101,11 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -161,14 +151,11 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -184,15 +171,11 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 -; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 16 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 @@ -228,14 +211,11 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -283,17 +263,12 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -309,15 +284,11 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 -; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64 @@ -353,17 +324,12 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma @@ -411,17 +377,12 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 2 -; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 2 -; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb @@ -437,15 +398,11 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 64 -; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP -; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 64 ; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 64 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128 @@ -481,17 +438,12 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 -; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 -; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP -; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 -; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma @@ -540,23 +492,19 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-NEXT: slli a1, a1, 2 ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; SPILL-O2-NEXT: add a0, a0, a1 -; SPILL-O2-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: li a1, 6 ; SPILL-O2-NEXT: mul a0, a0, a1 @@ -571,21 +519,17 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 -; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill +; SPILL-O2-VLEN128-NEXT: li a1, 64 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 ; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: #APP ; SPILL-O2-VLEN128-NEXT: #NO_APP ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 -; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: li a1, 32 ; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; SPILL-O2-VLEN128-NEXT: add a0, a0, a1 -; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret @@ -621,23 +565,19 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb -; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 ; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill ; SPILL-O2-VSETVLI-NEXT: #APP ; SPILL-O2-VSETVLI-NEXT: #NO_APP ; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb ; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload -; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 -; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload ; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; SPILL-O2-VSETVLI-NEXT: li a1, 6 ; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index 055d9ed630718..6b1e5e08c4b38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -30,35 +30,22 @@ body: | ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x11 = ADDI $x2, 16 + ; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0, align 8) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: VS1R_V $v0, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: VS1R_V $v2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: VS1R_V $v3, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: VS1R_V $v4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: VS1R_V $v5, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) + ; CHECK-NEXT: $x13 = SLLI $x12, 2 + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13 + ; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0, align 8) + ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 ; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store () into %stack.0) ; CHECK-NEXT: $x11 = ADDI $x2, 16 - ; CHECK-NEXT: $x12 = PseudoReadVLENB ; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load () from %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: $v8 = VL1RE8_V $x11 :: (load () from %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: $v9 = VL1RE8_V $x11 :: (load () from %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: $v10 = VL1RE8_V $x11 :: (load () from %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: $v11 = VL1RE8_V $x11 :: (load () from %stack.0) - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 - ; CHECK-NEXT: $v12 = VL1RE8_V $x11 :: (load () from %stack.0) + ; CHECK-NEXT: $x12 = PseudoReadVLENB + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load () from %stack.0, align 8) + ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 - ; CHECK-NEXT: $v13 = VL1RE8_V killed $x11 :: (load () from %stack.0) + ; CHECK-NEXT: $v12m2 = VL2RE8_V killed $x11 :: (load () from %stack.0, align 8) ; CHECK-NEXT: VS1R_V killed $v8, killed renamable $x10 ; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB ; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 3 From 8eca612c46432b265a297dab73b1f141635d1cce Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Wed, 13 Aug 2025 12:10:14 +0800 Subject: [PATCH 02/10] Add tests for 124/241 sequences --- llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 65 +++++++++++++++++-- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index 6b1e5e08c4b38..bd248bac717e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -2,15 +2,15 @@ # RUN: llc -mtriple=riscv64 -mattr=+v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s --- | - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" - target triple = "riscv64" - - define void @zvlsseg_spill(ptr %base, i64 %vl) { + define void @zvlsseg_spill_0(ptr %base, i64 %vl) { + ret void + } + define void @zvlsseg_spill_1(ptr %base, i64 %vl) { ret void } ... --- -name: zvlsseg_spill +name: zvlsseg_spill_0 tracksRegLiveness: true stack: - { id: 0, offset: 0, size: 64, alignment: 8, stack-id: scalable-vector } @@ -18,7 +18,7 @@ body: | bb.0: liveins: $x10, $x11 - ; CHECK-LABEL: name: zvlsseg_spill + ; CHECK-LABEL: name: zvlsseg_spill_0 ; CHECK: liveins: $x10, $x11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -16 @@ -62,3 +62,56 @@ body: | VS1R_V killed $v8, %0:gpr PseudoRET ... + +--- +name: zvlsseg_spill_1 +tracksRegLiveness: true +stack: + - { id: 0, offset: 0, size: 64, alignment: 8, stack-id: scalable-vector } +body: | + bb.0: + liveins: $x10, $x11 + ; CHECK-LABEL: name: zvlsseg_spill_1 + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB + ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 3 + ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: $v1_v2_v3_v4_v5_v6_v7 = PseudoVLSEG7E64_V_M1 undef $v1_v2_v3_v4_v5_v6_v7, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $x11 = ADDI $x2, 16 + ; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0) + ; CHECK-NEXT: $x12 = PseudoReadVLENB + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0, align 8) + ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: VS4R_V $v4m4, killed $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0, align 8) + ; CHECK-NEXT: $x11 = ADDI $x2, 16 + ; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load () from %stack.0, align 8) + ; CHECK-NEXT: $x12 = PseudoReadVLENB + ; CHECK-NEXT: $x13 = SLLI $x12, 1 + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13 + ; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load () from %stack.0, align 8) + ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load () from %stack.0) + ; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10 + ; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB + ; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 3 + ; CHECK-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $x2, 16 + ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 16 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gprnox0 = COPY $x11 + $v1_v2_v3_v4_v5_v6_v7 = PseudoVLSEG7E64_V_M1 undef $v1_v2_v3_v4_v5_v6_v7, %0, %1, 6, 0 + PseudoVSPILL7_M1 killed renamable $v1_v2_v3_v4_v5_v6_v7, %stack.0 :: (store () into %stack.0, align 8) + renamable $v10_v11_v12_v13_v14_v15_v16 = PseudoVRELOAD7_M1 %stack.0 :: (load () from %stack.0, align 8) + VS1R_V killed $v10, %0:gpr + PseudoRET +... From 096e51caf0964e280f1554c056359c86c9f93011 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Wed, 13 Aug 2025 12:23:34 +0800 Subject: [PATCH 03/10] First round of addressing comments 1. Add `NumRemaining`. 2. Rename `NewSize` to `VRegSize`. 3. Add argument comments. 4. Don't create `Step` for `ShiftAmount==0` case. --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 35 ++++++++++++--------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 34f69c1bb1883..6b6c472536e90 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -390,15 +390,14 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, } static std::tuple -getSpillReloadInfo(unsigned Idx, unsigned Total, uint16_t RegEncoding, - bool IsSpill) { - if (Idx + 8 <= Total && RegEncoding % 8 == 0) +getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) { + if (NumRemaining >= 8 && RegEncoding % 8 == 0) return {RISCVVType::LMUL_8, RISCV::VRM8RegClass, IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V}; - if (Idx + 4 <= Total && RegEncoding % 4 == 0) + if (NumRemaining >= 4 && RegEncoding % 4 == 0) return {RISCVVType::LMUL_4, RISCV::VRM4RegClass, IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V}; - if (Idx + 2 <= Total && RegEncoding % 2 == 0) + if (NumRemaining >= 2 && RegEncoding % 2 == 0) return {RISCVVType::LMUL_2, RISCV::VRM2RegClass, IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V}; return {RISCVVType::LMUL_1, RISCV::VRRegClass, @@ -431,20 +430,21 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { auto *OldMMO = *(II->memoperands_begin()); LocationSize OldLoc = OldMMO->getSize(); assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF)); - TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs); + TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs); Register VLENB = 0; unsigned PreSavedNum = 0; unsigned I = 0; while (I != NumRegs) { auto [LMulSaved, RegClass, Opcode] = - getSpillReloadInfo(I, NumRegs, SrcEncoding, true); + getSpillReloadInfo(NumRegs - I, SrcEncoding, /*IsSpill=*/true); auto [NumSaved, _] = RISCVVType::decodeVLMUL(LMulSaved); if (PreSavedNum) { - Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); + Register Step; if (auto VLEN = STI.getRealVLen()) { const int64_t VLENB = *VLEN / 8; int64_t Offset = VLENB * PreSavedNum; + Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); } else { if (!VLENB) { @@ -454,10 +454,12 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { uint32_t ShiftAmount = Log2_32(PreSavedNum); if (ShiftAmount == 0) Step = VLENB; - else + else { + Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) .addReg(VLENB) .addImm(ShiftAmount); + } } BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) @@ -472,7 +474,7 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { .addReg(ActualSrcReg) .addReg(Base, getKillRegState(I + NumSaved == NumRegs)) .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), - NewSize * NumSaved)) + VRegSize * NumSaved)) .addReg(SrcReg, RegState::Implicit); PreSavedNum = NumSaved; @@ -508,18 +510,19 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { auto *OldMMO = *(II->memoperands_begin()); LocationSize OldLoc = OldMMO->getSize(); assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF)); - TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs); + TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs); Register VLENB = 0; unsigned PreReloadedNum = 0; unsigned I = 0; while (I != NumRegs) { auto [LMulReloaded, RegClass, Opcode] = - getSpillReloadInfo(I, NumRegs, DestEncoding, false); + getSpillReloadInfo(NumRegs - I, DestEncoding, /*IsSpill=*/false); auto [NumReloaded, _] = RISCVVType::decodeVLMUL(LMulReloaded); if (PreReloadedNum) { - Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); + Register Step; if (auto VLEN = STI.getRealVLen()) { + Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); const int64_t VLENB = *VLEN / 8; int64_t Offset = VLENB * PreReloadedNum; STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); @@ -531,10 +534,12 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { uint32_t ShiftAmount = Log2_32(PreReloadedNum); if (ShiftAmount == 0) Step = VLENB; - else + else { + Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) .addReg(VLENB) .addImm(ShiftAmount); + } } BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) @@ -548,7 +553,7 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { BuildMI(MBB, II, DL, TII->get(Opcode), ActualDestReg) .addReg(Base, getKillRegState(I + NumReloaded == NumRegs)) .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), - NewSize * NumReloaded)); + VRegSize * NumReloaded)); PreReloadedNum = NumReloaded; DestEncoding += NumReloaded; From 2c0aad0abeabaf4b72bb93aa1b73b2e6fdc48101 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Wed, 13 Aug 2025 14:16:17 +0800 Subject: [PATCH 04/10] Fold spill/reload implementations into one function --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 137 +++++--------------- llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 4 +- 2 files changed, 37 insertions(+), 104 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 6b6c472536e90..d055a83a1e79f 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -404,9 +404,10 @@ getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) { IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V}; } -// Split a VSPILLx_Mx pseudo into multiple whole register stores separated by -// LMUL*VLENB bytes. -void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { +// Split a VSPILLx_Mx/VSPILLx_Mx pseudo into multiple whole register stores +// separated by LMUL*VLENB bytes. +void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, + bool IsSpill) const { DebugLoc DL = II->getDebugLoc(); MachineBasicBlock &MBB = *II->getParent(); MachineFunction &MF = *MBB.getParent(); @@ -421,8 +422,8 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { unsigned NumRegs = NF * LMUL; assert(NumRegs <= 8 && "Invalid NF/LMUL combinations."); - Register SrcReg = II->getOperand(0).getReg(); - uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); + Register Reg = II->getOperand(0).getReg(); + uint16_t RegEncoding = TRI->getEncodingValue(Reg); Register Base = II->getOperand(1).getReg(); bool IsBaseKill = II->getOperand(1).isKill(); Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass); @@ -433,17 +434,18 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs); Register VLENB = 0; - unsigned PreSavedNum = 0; + unsigned PreHandledNum = 0; unsigned I = 0; while (I != NumRegs) { - auto [LMulSaved, RegClass, Opcode] = - getSpillReloadInfo(NumRegs - I, SrcEncoding, /*IsSpill=*/true); - auto [NumSaved, _] = RISCVVType::decodeVLMUL(LMulSaved); - if (PreSavedNum) { + auto [LMulHandled, RegClass, Opcode] = + getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill); + auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled); + if (PreHandledNum) { Register Step; + // Optimize for constant VLEN. if (auto VLEN = STI.getRealVLen()) { const int64_t VLENB = *VLEN / 8; - int64_t Offset = VLENB * PreSavedNum; + int64_t Offset = VLENB * PreHandledNum; Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); } else { @@ -451,7 +453,7 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB); } - uint32_t ShiftAmount = Log2_32(PreSavedNum); + uint32_t ShiftAmount = Log2_32(PreHandledNum); if (ShiftAmount == 0) Step = VLENB; else { @@ -468,96 +470,27 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const { Base = NewBase; } - MCRegister ActualSrcReg = findVRegWithEncoding(RegClass, SrcEncoding); - - BuildMI(MBB, II, DL, TII->get(Opcode)) - .addReg(ActualSrcReg) - .addReg(Base, getKillRegState(I + NumSaved == NumRegs)) - .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), - VRegSize * NumSaved)) - .addReg(SrcReg, RegState::Implicit); - - PreSavedNum = NumSaved; - SrcEncoding += NumSaved; - I += NumSaved; - } - II->eraseFromParent(); -} - -// Split a VSPILLx_Mx pseudo into multiple whole register loads separated by -// LMUL*VLENB bytes. -void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const { - DebugLoc DL = II->getDebugLoc(); - MachineBasicBlock &MBB = *II->getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - const RISCVSubtarget &STI = MF.getSubtarget(); - const TargetInstrInfo *TII = STI.getInstrInfo(); - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - - auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode()); - unsigned NF = ZvlssegInfo->first; - unsigned LMUL = ZvlssegInfo->second; - unsigned NumRegs = NF * LMUL; - assert(NumRegs <= 8 && "Invalid NF/LMUL combinations."); - - Register DestReg = II->getOperand(0).getReg(); - uint16_t DestEncoding = TRI->getEncodingValue(DestReg); - Register Base = II->getOperand(1).getReg(); - bool IsBaseKill = II->getOperand(1).isKill(); - Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass); - - auto *OldMMO = *(II->memoperands_begin()); - LocationSize OldLoc = OldMMO->getSize(); - assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF)); - TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs); - - Register VLENB = 0; - unsigned PreReloadedNum = 0; - unsigned I = 0; - while (I != NumRegs) { - auto [LMulReloaded, RegClass, Opcode] = - getSpillReloadInfo(NumRegs - I, DestEncoding, /*IsSpill=*/false); - auto [NumReloaded, _] = RISCVVType::decodeVLMUL(LMulReloaded); - if (PreReloadedNum) { - Register Step; - if (auto VLEN = STI.getRealVLen()) { - Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); - const int64_t VLENB = *VLEN / 8; - int64_t Offset = VLENB * PreReloadedNum; - STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); - } else { - if (!VLENB) { - VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB); - } - uint32_t ShiftAmount = Log2_32(PreReloadedNum); - if (ShiftAmount == 0) - Step = VLENB; - else { - Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) - .addReg(VLENB) - .addImm(ShiftAmount); - } - } - - BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) - .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(Step, getKillRegState(true)); - Base = NewBase; - } - - MCRegister ActualDestReg = findVRegWithEncoding(RegClass, DestEncoding); + MCRegister ActualReg = findVRegWithEncoding(RegClass, RegEncoding); + MachineInstrBuilder MI; + if (IsSpill) + MI = BuildMI(MBB, II, DL, TII->get(Opcode)).addReg(ActualReg); + else + MI = BuildMI(MBB, II, DL, TII->get(Opcode), ActualReg); - BuildMI(MBB, II, DL, TII->get(Opcode), ActualDestReg) - .addReg(Base, getKillRegState(I + NumReloaded == NumRegs)) + MI.addReg(Base, getKillRegState(I + RegNumHandled == NumRegs)) .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), - VRegSize * NumReloaded)); - - PreReloadedNum = NumReloaded; - DestEncoding += NumReloaded; - I += NumReloaded; + VRegSize * RegNumHandled)); + + // Adding implicit-use of super register to describe we are using part of + // super register, that prevents machine verifier complaining when part of + // subreg is undef, see comment in MachineVerifier::checkLiveness for more + // detail. + if (IsSpill) + MI.addReg(Reg, RegState::Implicit); + + PreHandledNum = RegNumHandled; + RegEncoding += RegNumHandled; + I += RegNumHandled; } II->eraseFromParent(); } @@ -661,7 +594,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case RISCV::PseudoVSPILL6_M1: case RISCV::PseudoVSPILL7_M1: case RISCV::PseudoVSPILL8_M1: - lowerVSPILL(II); + lowerSegmentSpillReload(II, /*IsSpill=*/true); return true; case RISCV::PseudoVRELOAD2_M1: case RISCV::PseudoVRELOAD2_M2: @@ -674,7 +607,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case RISCV::PseudoVRELOAD6_M1: case RISCV::PseudoVRELOAD7_M1: case RISCV::PseudoVRELOAD8_M1: - lowerVRELOAD(II); + lowerSegmentSpillReload(II, /*IsSpill=*/false); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index ffb4f84afb9a3..2810139bf52ea 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -107,8 +107,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override; - void lowerVSPILL(MachineBasicBlock::iterator II) const; - void lowerVRELOAD(MachineBasicBlock::iterator II) const; + void lowerSegmentSpillReload(MachineBasicBlock::iterator II, + bool IsSpill) const; Register getFrameRegister(const MachineFunction &MF) const override; From dd3126eea923224f6636ed24de68cd3c52c30edc Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Wed, 13 Aug 2025 14:24:34 +0800 Subject: [PATCH 05/10] Inline const VLENB --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index d055a83a1e79f..a1582912bd258 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -444,8 +444,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, Register Step; // Optimize for constant VLEN. if (auto VLEN = STI.getRealVLen()) { - const int64_t VLENB = *VLEN / 8; - int64_t Offset = VLENB * PreHandledNum; + int64_t Offset = *VLEN / 8 * PreHandledNum; Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset); } else { From 00d9d6c9cf1ee781117e9b12e941dda97605962f Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Wed, 13 Aug 2025 14:25:06 +0800 Subject: [PATCH 06/10] Set IsKill for last step reg --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 2 +- llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index a1582912bd258..62771e3306177 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -465,7 +465,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(Step, getKillRegState(true)); + .addReg(Step, getKillRegState(I + RegNumHandled == NumRegs)); Base = NewBase; } diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index bd248bac717e8..dd9960d17af43 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -41,7 +41,7 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load () from %stack.0) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load () from %stack.0, align 8) ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 @@ -85,7 +85,7 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0, align 8) ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 From bf31a1ec5c6f925559a0058234331efbc13762fb Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Thu, 14 Aug 2025 19:46:57 +0800 Subject: [PATCH 07/10] Update IsKill state --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 5 +++-- llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 62771e3306177..2740dbf4f5d45 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -458,14 +458,15 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, else { Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) - .addReg(VLENB) + .addReg(VLENB, getKillRegState(I + RegNumHandled == NumRegs)) .addImm(ShiftAmount); } } BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(Step, getKillRegState(I + RegNumHandled == NumRegs)); + .addReg(Step, getKillRegState(Step != VLENB || + I + RegNumHandled == NumRegs)); Base = NewBase; } diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index dd9960d17af43..bd248bac717e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -41,7 +41,7 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load () from %stack.0) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 ; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load () from %stack.0, align 8) ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 @@ -85,7 +85,7 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 + ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 ; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0, align 8) ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 From 98a0a309c8c1aea43b824b96b4360faf24fa0864 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Mon, 18 Aug 2025 15:25:51 +0800 Subject: [PATCH 08/10] Fix test --- llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index bd248bac717e8..dd9960d17af43 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -41,7 +41,7 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load () from %stack.0) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load () from %stack.0, align 8) ; CHECK-NEXT: $x12 = SLLI killed $x12, 2 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 @@ -85,7 +85,7 @@ body: | ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0) ; CHECK-NEXT: $x12 = PseudoReadVLENB - ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 + ; CHECK-NEXT: $x11 = ADD killed $x11, $x12 ; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store () into %stack.0, align 8) ; CHECK-NEXT: $x12 = SLLI killed $x12, 1 ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12 From 91fba4cf65eb85ab161c1eaefb7c7a641ce2c381 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Tue, 19 Aug 2025 12:14:02 +0800 Subject: [PATCH 09/10] Combine BuildMIs --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 2740dbf4f5d45..b9d5b2ae3885c 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -471,22 +471,19 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, } MCRegister ActualReg = findVRegWithEncoding(RegClass, RegEncoding); - MachineInstrBuilder MI; - if (IsSpill) - MI = BuildMI(MBB, II, DL, TII->get(Opcode)).addReg(ActualReg); - else - MI = BuildMI(MBB, II, DL, TII->get(Opcode), ActualReg); - - MI.addReg(Base, getKillRegState(I + RegNumHandled == NumRegs)) - .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), - VRegSize * RegNumHandled)); + MachineInstrBuilder MIB = + BuildMI(MBB, II, DL, TII->get(Opcode)) + .addReg(ActualReg, getDefRegState(!IsSpill)) + .addReg(Base, getKillRegState(I + RegNumHandled == NumRegs)) + .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), + VRegSize * RegNumHandled)); // Adding implicit-use of super register to describe we are using part of // super register, that prevents machine verifier complaining when part of // subreg is undef, see comment in MachineVerifier::checkLiveness for more // detail. if (IsSpill) - MI.addReg(Reg, RegState::Implicit); + MIB.addReg(Reg, RegState::Implicit); PreHandledNum = RegNumHandled; RegEncoding += RegNumHandled; From 5c1e8bb352988de0d58f3b718b72266a668ab268 Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Wed, 20 Aug 2025 12:02:39 +0800 Subject: [PATCH 10/10] Add IsLast --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index b9d5b2ae3885c..440f6cfc39b54 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -440,6 +440,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, auto [LMulHandled, RegClass, Opcode] = getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill); auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled); + bool IsLast = I + RegNumHandled == NumRegs; if (PreHandledNum) { Register Step; // Optimize for constant VLEN. @@ -458,15 +459,14 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, else { Step = MRI.createVirtualRegister(&RISCV::GPRRegClass); BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step) - .addReg(VLENB, getKillRegState(I + RegNumHandled == NumRegs)) + .addReg(VLENB, getKillRegState(IsLast)) .addImm(ShiftAmount); } } BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase) .addReg(Base, getKillRegState(I != 0 || IsBaseKill)) - .addReg(Step, getKillRegState(Step != VLENB || - I + RegNumHandled == NumRegs)); + .addReg(Step, getKillRegState(Step != VLENB || IsLast)); Base = NewBase; } @@ -474,7 +474,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II, MachineInstrBuilder MIB = BuildMI(MBB, II, DL, TII->get(Opcode)) .addReg(ActualReg, getDefRegState(!IsSpill)) - .addReg(Base, getKillRegState(I + RegNumHandled == NumRegs)) + .addReg(Base, getKillRegState(IsLast)) .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), VRegSize * RegNumHandled));